-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtranscript-browser.html
More file actions
246 lines (223 loc) · 518 KB
/
transcript-browser.html
File metadata and controls
246 lines (223 loc) · 518 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>CLembench Transcript Browser — v3.0</title>
<style>
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: system-ui, sans-serif;
background: #1e1e2e;
color: #cdd6f4;
height: 100vh;
display: flex;
flex-direction: column;
}
header {
background: #181825;
border-bottom: 1px solid #313244;
padding: .75rem 1.25rem;
display: flex;
align-items: center;
gap: 1.5rem;
flex-wrap: wrap;
}
header h1 {
font-size: 1rem;
font-weight: 600;
color: #cba6f7;
white-space: nowrap;
}
.controls {
display: flex;
gap: .75rem;
flex-wrap: wrap;
align-items: center;
}
.ctrl-group {
display: flex;
flex-direction: column;
gap: .2rem;
}
.ctrl-group label {
font-size: .7rem;
color: #6c7086;
text-transform: uppercase;
letter-spacing: .05em;
}
select {
background: #313244;
color: #cdd6f4;
border: 1px solid #45475a;
border-radius: .4rem;
padding: .35rem .6rem;
font-size: .85rem;
cursor: pointer;
min-width: 200px;
}
select:focus { outline: 2px solid #cba6f7; }
select:disabled { opacity: .4; cursor: default; }
#status {
font-size: .8rem;
color: #6c7086;
margin-left: auto;
white-space: nowrap;
}
#viewer {
flex: 1;
border: none;
background: #fff;
}
#placeholder {
flex: 1;
display: flex;
align-items: center;
justify-content: center;
color: #45475a;
font-size: 1.1rem;
}
</style>
</head>
<body>
<header>
<h1>CLembench v3.0</h1>
<div class="controls">
<div class="ctrl-group">
<label for="sel-game">Game</label>
<select id="sel-game"><option value="">— select game —</option></select>
</div>
<div class="ctrl-group">
<label for="sel-model">Model</label>
<select id="sel-model" disabled><option value="">— select model —</option></select>
</div>
<div class="ctrl-group">
<label for="sel-exp">Experiment</label>
<select id="sel-exp" disabled><option value="">— select experiment —</option></select>
</div>
<div class="ctrl-group">
<label for="sel-inst">Episode</label>
<select id="sel-inst" disabled><option value="">— select episode —</option></select>
</div>
</div>
<span id="status"></span>
</header>
<div id="placeholder">Select a game, model, experiment, and episode above.</div>
<iframe id="viewer" style="display:none"></iframe>
<script>
const INDEX = {"adventuregame": {"Aya-Expanse-32B-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Llama-3.1-70B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Llama-3.1-8B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Llama-3.3-70B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Olmo-3.1-32B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Qwen2.5-72B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "claude-sonnet-4-5-20250929-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "claude-sonnet-4-5-azure-high-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "claude-sonnet-4-5-azure-low-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "deepseek-v3.2-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "deepseek-v3.2-without-reasoning-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gemini-3-flash-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "glm-4.6-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "glm-4.7-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "glm-5-without-reasoning-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-5.2-2025-12-11-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-5.2-azure-high-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-5.2-azure-medium-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-5.2-azure-minimal-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-oss-120b-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "gpt-oss-20b-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "kimi-k2-thinking-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "kimi-k2.5-without-reasoning-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "minimax-m2.5-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "mistral-3-large-2512-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}, "qwen3-max-t1.0": {"home_deliver_three_basic_hard_invlimittwo": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_created_basic_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_easy_basic_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "new-words_home-delivery_medium_basic_medium": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"], "potion_brewing_basic_undefined": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007"]}}, "clean_up": {"Aya-Expanse-32B-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Llama-3.1-70B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Llama-3.1-8B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Llama-3.3-70B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Olmo-3.1-32B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Qwen2.5-72B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "claude-sonnet-4-5-20250929-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "claude-sonnet-4-5-azure-high-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "claude-sonnet-4-5-azure-low-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "deepseek-v3.2-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "deepseek-v3.2-without-reasoning-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gemini-3-flash-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "glm-4.6-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "glm-4.7-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "glm-5-without-reasoning-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-5.2-2025-12-11-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-5.2-azure-high-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-5.2-azure-medium-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-5.2-azure-minimal-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-oss-120b-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "gpt-oss-20b-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "kimi-k2-thinking-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "kimi-k2.5-without-reasoning-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "minimax-m2.5-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "mistral-3-large-2512-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}, "qwen3-max-t1.0": {"0_easy_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "0_easy_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "1_medium_7obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_3obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_5obj_en": ["instance_00000", "instance_00001", "instance_00002"], "2_hard_7obj_en": ["instance_00000", "instance_00001", "instance_00002"]}}, "codenames": {"Aya-Expanse-32B-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00006", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00006", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"abstract": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "ambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "association_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "concrete": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "frequency_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_difficult": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "opponent_none": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_high": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "risk_low": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "unambiguous": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "dond": {"Aya-Expanse-32B-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-70B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-8B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.3-70B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Olmo-3.1-32B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-72B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-20250929-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-high-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-low-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-without-reasoning-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gemini-3-flash-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.6-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.7-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-5-without-reasoning-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-2025-12-11-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-high-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-medium-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-minimal-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-120b-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-20b-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2-thinking-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2.5-without-reasoning-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "minimax-m2.5-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "mistral-3-large-2512-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "qwen3-max-t1.0": {"coop_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "semi_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}}, "guesswhat": {"Aya-Expanse-32B-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"Abs_Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Abs_Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "Level_3": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "hot_air_balloon": {"Aya-Expanse-32B-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Llama-3.1-70B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Llama-3.1-8B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Llama-3.3-70B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Olmo-3.1-32B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Qwen2.5-72B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00003", "instance_00004"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00001", "instance_00003", "instance_00004", "instance_00005"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "claude-sonnet-4-5-20250929-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "claude-sonnet-4-5-azure-high-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "claude-sonnet-4-5-azure-low-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "deepseek-v3.2-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "deepseek-v3.2-without-reasoning-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gemini-3-flash-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "glm-4.6-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "glm-4.7-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "glm-5-without-reasoning-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-5.2-2025-12-11-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-5.2-azure-high-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-5.2-azure-medium-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-5.2-azure-minimal-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-oss-120b-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "gpt-oss-20b-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "kimi-k2-thinking-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "kimi-k2.5-without-reasoning-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "minimax-m2.5-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "mistral-3-large-2512-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}, "qwen3-max-t1.0": {"air_balloon_survival_en_complexity_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_complexity_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_negotiation_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_easy": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"], "air_balloon_survival_en_reasoning off_hard": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005"]}}, "imagegame": {"Aya-Expanse-32B-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-70B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-8B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.3-70B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Olmo-3.1-32B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-72B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-20250929-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-high-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-low-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-without-reasoning-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gemini-3-flash-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.6-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.7-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-5-without-reasoning-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-2025-12-11-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-high-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-medium-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-minimal-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-120b-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-20b-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2-thinking-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2.5-without-reasoning-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "minimax-m2.5-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "mistral-3-large-2512-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "qwen3-max-t1.0": {"compact_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}}, "matchit_ascii": {"Aya-Expanse-32B-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"different_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "same_grid": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_1": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "similar_grid_2": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "privateshared": {"Aya-Expanse-32B-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"job-interview": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "letter-number": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "restaurant": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "things-places": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"], "travel-booking": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "referencegame": {"Aya-Expanse-32B-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Llama-3.1-70B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Llama-3.1-8B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Llama-3.3-70B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Olmo-3.1-32B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Qwen2.5-72B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "claude-sonnet-4-5-20250929-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "claude-sonnet-4-5-azure-high-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "claude-sonnet-4-5-azure-low-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "deepseek-v3.2-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "deepseek-v3.2-without-reasoning-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gemini-3-flash-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "glm-4.6-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "glm-4.7-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "glm-5-without-reasoning-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-5.2-2025-12-11-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-5.2-azure-high-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-5.2-azure-medium-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-5.2-azure-minimal-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-oss-120b-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "gpt-oss-20b-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "kimi-k2-thinking-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "kimi-k2.5-without-reasoning-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "minimax-m2.5-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00016", "instance_00017"]}, "mistral-3-large-2512-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}, "qwen3-max-t1.0": {"letter_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_columns": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "line_grids_rows": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "number_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"], "random_grids": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017"]}}, "taboo": {"Aya-Expanse-32B-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-70B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.1-8B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Llama-3.3-70B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Olmo-3.1-32B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-72B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-20250929-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-high-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "claude-sonnet-4-5-azure-low-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "deepseek-v3.2-without-reasoning-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gemini-3-flash-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.6-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-4.7-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "glm-5-without-reasoning-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-2025-12-11-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-high-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-medium-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-5.2-azure-minimal-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-120b-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "gpt-oss-20b-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2-thinking-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "kimi-k2.5-without-reasoning-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "minimax-m2.5-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "mistral-3-large-2512-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}, "qwen3-max-t1.0": {"high_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "low_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_en": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"]}}, "textmapworld": {"Aya-Expanse-32B-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "large_cycle": ["instance_00040", "instance_00041", "instance_00042", "instance_00043", "instance_00044", "instance_00045", "instance_00046", "instance_00047", "instance_00048", "instance_00049"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "medium_cycle": ["instance_00030", "instance_00031", "instance_00032", "instance_00033", "instance_00034", "instance_00035", "instance_00036", "instance_00037", "instance_00038", "instance_00039"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "textmapworld_graphreasoning": {"Aya-Expanse-32B-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00012", "instance_00013", "instance_00015", "instance_00018"], "small": ["instance_00001", "instance_00002", "instance_00003", "instance_00005", "instance_00006"]}, "gpt-oss-120b-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"large": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "medium": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "small": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "textmapworld_specificroom": {"Aya-Expanse-32B-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-70B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.1-8B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Llama-3.3-70B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Olmo-3.1-32B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-72B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-20250929-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-high-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "claude-sonnet-4-5-azure-low-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "deepseek-v3.2-without-reasoning-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gemini-3-flash-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.6-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-4.7-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "glm-5-without-reasoning-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-2025-12-11-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-high-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-medium-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-5.2-azure-minimal-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-120b-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "gpt-oss-20b-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2-thinking-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "kimi-k2.5-without-reasoning-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "minimax-m2.5-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "mistral-3-large-2512-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}, "qwen3-max-t1.0": {"close": ["instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019"], "far": ["instance_00020", "instance_00021", "instance_00022", "instance_00023", "instance_00024", "instance_00025", "instance_00026", "instance_00027", "instance_00028", "instance_00029"], "on": ["instance_00000", "instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009"]}}, "wordle": {"Aya-Expanse-32B-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-70B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-8B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.3-70B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Olmo-3.1-32B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-72B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-20250929-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-high-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-low-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-without-reasoning-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gemini-3-flash-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.6-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.7-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-5-without-reasoning-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-2025-12-11-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-high-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-medium-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-minimal-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-120b-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-20b-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2-thinking-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2.5-without-reasoning-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "minimax-m2.5-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "mistral-3-large-2512-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "qwen3-max-t1.0": {"high_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_no_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}}, "wordle_withclue": {"Aya-Expanse-32B-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-70B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-8B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.3-70B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Olmo-3.1-32B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-72B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-20250929-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-high-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-low-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-without-reasoning-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gemini-3-flash-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.6-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.7-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-5-without-reasoning-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-2025-12-11-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-high-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-medium-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-minimal-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-120b-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-20b-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2-thinking-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2.5-without-reasoning-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "minimax-m2.5-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "mistral-3-large-2512-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "qwen3-max-t1.0": {"high_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_no_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}}, "wordle_withcritic": {"Aya-Expanse-32B-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "EuroLLM-22B-Instruct-2512-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-70B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.1-8B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Llama-3.3-70B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Ministral-3-14B-Reasoning-2512-nothink-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Olmo-3.1-32B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-72B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen2.5-Coder-32B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Qwen3-Next-80B-A3B-Instruct-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "Teuken-7B-Instruct-v0.4-t0.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-20250929-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-high-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "claude-sonnet-4-5-azure-low-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "deepseek-v3.2-without-reasoning-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gemini-3-flash-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.6-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-4.7-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "glm-5-without-reasoning-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-2025-12-11-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-high-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-medium-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-5.2-azure-minimal-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-120b-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "gpt-oss-20b-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2-thinking-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "kimi-k2.5-without-reasoning-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "minimax-m2.5-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "mistral-3-large-2512-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}, "qwen3-max-t1.0": {"high_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010"], "medium_frequency_words_clue_with_critic": ["instance_00001", "instance_00002", "instance_00003", "instance_00004", "instance_00005", "instance_00006", "instance_00007", "instance_00008", "instance_00009", "instance_00010", "instance_00011", "instance_00012", "instance_00013", "instance_00014", "instance_00015", "instance_00016", "instance_00017", "instance_00018", "instance_00019", "instance_00020"]}}};
const selGame = document.getElementById('sel-game');
const selModel = document.getElementById('sel-model');
const selExp = document.getElementById('sel-exp');
const selInst = document.getElementById('sel-inst');
const viewer = document.getElementById('viewer');
const placeholder = document.getElementById('placeholder');
const status = document.getElementById('status');
function populate(sel, values, placeholder) {
sel.innerHTML = `<option value="">${placeholder}</option>`;
values.forEach(v => {
const o = document.createElement('option');
o.value = o.textContent = v;
sel.appendChild(o);
});
}
function reset(...sels) {
sels.forEach(s => {
s.innerHTML = `<option value="">—</option>`;
s.disabled = true;
});
}
const REPO_RAW = 'https://raw.githubusercontent.com/clembench/clembench-runs/main';
function showTranscript() {
const game = selGame.value;
const model = selModel.value;
const exp = selExp.value;
const inst = selInst.value;
if (!game || !model || !exp || !inst) return;
const path = `v3.0/${model}/${game}/${exp}/${inst}/transcript.html`;
const url = `${REPO_RAW}/${path}`;
status.textContent = path;
placeholder.style.display = 'none';
viewer.style.display = 'block';
viewer.srcdoc = '<p style="font-family:sans-serif;padding:2rem;color:#888">Loading…</p>';
fetch(url)
.then(r => { if (!r.ok) throw new Error(r.status); return r.text(); })
.then(html => { viewer.srcdoc = html; })
.catch(err => {
viewer.srcdoc = `<p style="font-family:sans-serif;padding:2rem;color:red">Failed to load transcript: ${err}</p>`;
});
}
// Populate games on load
populate(selGame, Object.keys(INDEX).sort(), '— select game —');
selGame.addEventListener('change', () => {
const game = selGame.value;
reset(selModel, selExp, selInst);
viewer.style.display = 'none';
placeholder.style.display = 'flex';
status.textContent = '';
if (!game) return;
const models = Object.keys(INDEX[game] || {}).sort();
populate(selModel, models, '— select model —');
selModel.disabled = false;
});
selModel.addEventListener('change', () => {
const game = selGame.value;
const model = selModel.value;
const prevExp = selExp.value;
const prevInst = selInst.value;
reset(selExp, selInst);
status.textContent = '';
if (!model) {
viewer.style.display = 'none';
placeholder.style.display = 'flex';
return;
}
const exps = Object.keys((INDEX[game] || {})[model] || {}).sort();
populate(selExp, exps, '— select experiment —');
selExp.disabled = false;
// Restore previous experiment if available under the new model
if (prevExp && exps.includes(prevExp)) {
selExp.value = prevExp;
const insts = ((INDEX[game] || {})[model] || {})[prevExp] || [];
populate(selInst, insts, '— select episode —');
selInst.disabled = false;
// Restore previous instance if available
if (prevInst && insts.includes(prevInst)) {
selInst.value = prevInst;
showTranscript();
} else {
viewer.style.display = 'none';
placeholder.style.display = 'flex';
}
} else {
viewer.style.display = 'none';
placeholder.style.display = 'flex';
}
});
selExp.addEventListener('change', () => {
const game = selGame.value;
const model = selModel.value;
const exp = selExp.value;
const prevInst = selInst.value;
reset(selInst);
status.textContent = '';
if (!exp) {
viewer.style.display = 'none';
placeholder.style.display = 'flex';
return;
}
const insts = ((INDEX[game] || {})[model] || {})[exp] || [];
populate(selInst, insts, '— select episode —');
selInst.disabled = false;
// Restore previous instance if available under the new experiment
if (prevInst && insts.includes(prevInst)) {
selInst.value = prevInst;
showTranscript();
} else {
viewer.style.display = 'none';
placeholder.style.display = 'flex';
}
});
selInst.addEventListener('change', showTranscript);
</script>
</body>
</html>