Skip to content

Commit 27aec5d

Browse files
committed
Optimize webhooks JSON with compact structure and string interning
1 parent 4429c41 commit 27aec5d

File tree

13 files changed

+34305
-49177
lines changed

13 files changed

+34305
-49177
lines changed

.github/workflows/buildtest.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,31 @@ jobs:
6464
echo ""
6565
exit 1
6666
fi
67+
68+
validate-webhooks:
69+
runs-on: ubuntu-latest
70+
name: Validate webhook optimization
71+
72+
steps:
73+
- uses: actions/checkout@v4
74+
- name: Use Node.js 22.x
75+
uses: actions/setup-node@v4
76+
with:
77+
node-version: 22.x
78+
cache: 'npm'
79+
registry-url: 'https://npm.pkg.github.com'
80+
- run: npm ci
81+
env:
82+
NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
83+
- name: Generate full webhooks file
84+
run: cd languageservice && npm run update-webhooks
85+
- name: Run optimization validation tests
86+
run: cd languageservice && npm test -- --testPathPattern=eventPayloads
87+
- name: Verify validation tests ran
88+
run: |
89+
if [ ! -f languageservice/src/context-providers/events/webhooks.full.validation-complete ]; then
90+
echo "ERROR: Validation tests did not run!"
91+
echo "The webhooks.full.validation-complete marker file was not created."
92+
exit 1
93+
fi
94+
echo "Validation tests completed at: $(cat languageservice/src/context-providers/events/webhooks.full.validation-complete)"

.gitignore

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ node_modules
77
# Minified JSON (generated at build time)
88
*.min.json
99

10-
# Intermediate JSON for size comparison (generated by update-webhooks --all)
11-
*.all.json
12-
*.drop.json
13-
*.strip.json
10+
# Full webhooks source (generated by update-webhooks, used for validation tests)
11+
*.full.json
12+
13+
# Validation marker (generated by tests)
14+
*.validation-complete

docs/json-data-files.md

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@ This document describes the JSON data files used by the language service package
66

77
The language service uses several JSON files containing schema definitions, webhook payloads, and other metadata. To reduce bundle size, these files are:
88

9-
1. **Optimized at generation time** — unused events are dropped, unused fields are stripped
10-
2. **Minified at build time** — whitespace is removed to produce `.min.json` files
9+
1. **Optimized at generation time** — unused events are dropped, unused fields are stripped, shared objects are deduplicated, property names are interned
10+
2. **Compacted using a space-efficient format** — params use type-based dispatch arrays instead of objects
11+
3. **Minified at build time** — whitespace is removed to produce `.min.json` files
1112

1213
The source `.json` files are human-readable and checked into the repository. The `.min.json` files are generated during build and gitignored.
1314

@@ -19,6 +20,7 @@ The source `.json` files are human-readable and checked into the repository. The
1920
|------|-------------|
2021
| `src/context-providers/events/webhooks.json` | Webhook event payload schemas for autocompletion |
2122
| `src/context-providers/events/objects.json` | Deduplicated shared object definitions referenced by webhooks |
23+
| `src/context-providers/events/strings.json` | Interned property names shared by webhooks and objects |
2224
| `src/context-providers/events/schedule.json` | Schedule event context data |
2325
| `src/context-providers/events/workflow_call.json` | Reusable workflow call context data |
2426
| `src/context-providers/descriptions.json` | Context variable descriptions for hover |
@@ -33,7 +35,7 @@ The source `.json` files are human-readable and checked into the repository. The
3335

3436
### Webhooks and Objects
3537

36-
The `webhooks.json` and `objects.json` files are generated from the [GitHub REST API description](https://github.com/github/rest-api-description):
38+
The `webhooks.json`, `objects.json`, and `strings.json` files are generated from the [GitHub REST API description](https://github.com/github/rest-api-description):
3739

3840
```bash
3941
cd languageservice
@@ -44,9 +46,11 @@ This script:
4446
1. Fetches webhook schemas from the GitHub API description
4547
2. **Validates** all events are categorized (fails if new events are found)
4648
3. **Drops** events that aren't valid workflow triggers (see [Dropped Events](#dropped-events))
47-
4. **Strips** unused fields like `description` and `summary` (see [Stripped Fields](#stripped-fields))
48-
5. **Deduplicates** shared object definitions into `objects.json`
49-
6. Writes the optimized, pretty-printed JSON files
49+
4. **Strips** unused fields like `type`, `in`, `isRequired` (see [Stripped Fields](#stripped-fields))
50+
5. **Compacts** params into a space-efficient array format (see [Compact Format](#compact-format))
51+
6. **Deduplicates** shared object definitions into `objects.json`
52+
7. **Interns** duplicate property names into `strings.json` (see [String Interning](#string-interning))
53+
8. Writes the optimized, pretty-printed JSON files
5054

5155
### Handling New Webhook Events
5256

@@ -101,13 +105,15 @@ The code imports the minified versions:
101105

102106
```ts
103107
import webhooks from "./events/webhooks.min.json"
108+
import objects from "./events/objects.min.json"
109+
import strings from "./events/strings.min.json"
104110
```
105111

106112
## CI Verification
107113

108114
CI verifies that generated source files are up-to-date:
109115

110-
1. Runs `npm run update-webhooks` to regenerate webhooks.json and objects.json
116+
1. Runs `npm run update-webhooks` to regenerate webhooks.json, objects.json, and strings.json
111117
2. Checks for uncommitted changes with `git diff --exit-code`
112118

113119
The `.min.json` files are generated at build time and are not committed to the repository.
@@ -147,4 +153,95 @@ Only `name`, `description`, and `childParamsGroups` are kept — these are used
147153

148154
To compare all fields vs stripped, run `npm run update-webhooks -- --all` and diff the `.all.json` files against the regular ones.
149155

150-
See `EVENT_ACTION_FIELDS` and `BODY_PARAM_FIELDS` in `script/webhooks/index.ts` to modify what gets stripped.
156+
See `EVENT_ACTION_FIELDS` and `BODY_PARAM_FIELDS` in `script/webhooks/update-webhooks.ts` to modify what gets stripped.
157+
158+
## Compact Format
159+
160+
After stripping, params are further compacted from objects into arrays using type-based dispatch:
161+
162+
| Format | Meaning |
163+
|--------|---------|
164+
| `"name"` | Name only (no description, no children) |
165+
| `[name, desc]` | Name + description (arr[1] is a string) |
166+
| `[name, children]` | Name + children (arr[1] is an array) |
167+
| `[name, desc, children]` | Name + description + children |
168+
169+
The reader uses `typeof arr[1]` to determine the format: if it's a string, it's a description; if it's an array, it's children.
170+
171+
**Example:**
172+
173+
```json
174+
// Before (object format)
175+
{
176+
"name": "issue",
177+
"description": "The issue itself.",
178+
"childParamsGroups": [
179+
{ "name": "id" },
180+
{ "name": "title", "description": "Issue title" }
181+
]
182+
}
183+
184+
// After (compact format)
185+
["issue", "The issue itself.", [
186+
"id",
187+
["title", "Issue title"]
188+
]]
189+
```
190+
191+
## String Interning
192+
193+
Property names that appear 2+ times are "interned" into a shared string table (`strings.json`). In the compact arrays, these names are replaced with numeric indices:
194+
195+
```json
196+
// strings.json
197+
["url", "id", "name", ...] // Index 0 = "url", 1 = "id", 2 = "name"
198+
199+
// webhooks.json - uses indices instead of strings
200+
["push", [
201+
[0, "The URL..."], // 0 = "url" from string table
202+
[1, "Unique ID"], // 1 = "id"
203+
2 // 2 = "name" (name-only, no description)
204+
]]
205+
```
206+
207+
**How to distinguish indices from other values:**
208+
209+
- **Top-level numbers in `p` arrays** → Object indices (references into `objects.json`)
210+
- **Nested numbers inside compact arrays** → String indices (references into `strings.json`)
211+
- **Literal strings** → Singletons (names appearing only once, not interned)
212+
213+
Singletons are kept as literal strings for readability and to avoid the overhead of adding rarely-used names to the string table.
214+
215+
## Deduplication
216+
217+
Shared object definitions are extracted into `objects.json` and referenced by index:
218+
219+
```json
220+
// objects.json
221+
[
222+
["url", "The URL"], // Index 0
223+
["id", "Unique identifier"], // Index 1
224+
[...]
225+
]
226+
227+
// webhooks.json - top-level numbers reference objects
228+
{
229+
"push": {
230+
"default": {
231+
"p": [0, 1, ["ref", "The git ref"]] // 0 and 1 are object indices
232+
}
233+
}
234+
}
235+
```
236+
237+
This reduces duplication when the same object structure appears in multiple events (e.g., `repository`, `sender`, `organization`).
238+
239+
## Size Reduction
240+
241+
The optimizations achieve approximately 97% file size reduction:
242+
243+
| Stage | Minified | Gzip |
244+
|-------|----------|------|
245+
| Original (webhooks.full.json) | 6.7 MB | 310 KB |
246+
| After optimization | 209 KB | 22 KB |
247+
| **Reduction** | **97%** | **93%** |

languageservice/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@
3737
"format-check": "prettier --check '**/*.ts'",
3838
"lint": "eslint 'src/**/*.ts'",
3939
"lint-fix": "eslint --fix 'src/**/*.ts'",
40-
"minify-json": "node ../script/minify-json.js src/context-providers/descriptions.json src/context-providers/events/webhooks.json src/context-providers/events/objects.json src/context-providers/events/schedule.json src/context-providers/events/workflow_call.json",
40+
"minify-json": "node ../script/minify-json.js src/context-providers/descriptions.json src/context-providers/events/webhooks.json src/context-providers/events/objects.json src/context-providers/events/strings.json src/context-providers/events/schedule.json src/context-providers/events/workflow_call.json",
4141
"prebuild": "npm run minify-json",
4242
"prepublishOnly": "npm run build && npm run test",
4343
"pretest": "npm run minify-json",
4444
"test": "NODE_OPTIONS=\"--experimental-vm-modules\" jest",
4545
"test-watch": "NODE_OPTIONS=\"--experimental-vm-modules\" jest --watch",
46-
"update-webhooks": "npx tsx script/webhooks/index.ts",
46+
"update-webhooks": "npx tsx script/webhooks/update-webhooks.ts",
4747
"watch": "tsc --build tsconfig.build.json --watch"
4848
},
4949
"dependencies": {

languageservice/script/webhooks/deduplicate.ts

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,38 @@
11
import Webhook from "./webhook";
22

3+
/**
4+
* Get the name from a param.
5+
* Formats: "name" (string), or [name, ...] (array)
6+
*/
7+
function getParamName(param: any): string {
8+
if (typeof param === "string") {
9+
return param;
10+
}
11+
if (Array.isArray(param)) {
12+
return param[0];
13+
}
14+
return param.name;
15+
}
16+
17+
/**
18+
* Get params from a webhook action.
19+
* Uses 'p' (short key) if present, falls back to 'bodyParameters'
20+
*/
21+
function getParams(webhook: any): any[] {
22+
return webhook.p || webhook.bodyParameters || [];
23+
}
24+
25+
/**
26+
* Set params on a webhook action using the short key 'p'
27+
*/
28+
function setParams(webhook: any, params: any[]): void {
29+
if (webhook.p !== undefined) {
30+
webhook.p = params;
31+
} else {
32+
webhook.bodyParameters = params;
33+
}
34+
}
35+
336
// Store any repeated body parameters in an array
437
// and replace them in the webhook with an index in the array
538
export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webhook>>): any[] {
@@ -10,10 +43,11 @@ export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webh
1043
const objectCount: Record<string, number> = {};
1144

1245
for (const webhook of iterateWebhooks(webhooks)) {
13-
for (const param of webhook.bodyParameters) {
14-
objectsByName[param.name] ||= [];
15-
const index = findOrAdd(param, objectsByName[param.name]);
16-
const key = `${param.name}:${index}`;
46+
for (const param of getParams(webhook)) {
47+
const name = getParamName(param);
48+
objectsByName[name] ||= [];
49+
const index = findOrAdd(param, objectsByName[name]);
50+
const key = `${name}:${index}`;
1751
objectCount[key] ||= 0;
1852
objectCount[key]++;
1953
}
@@ -27,18 +61,19 @@ export function deduplicateWebhooks(webhooks: Record<string, Record<string, Webh
2761

2862
for (const webhook of iterateWebhooks(webhooks)) {
2963
const newParams: any[] = [];
30-
for (const param of webhook.bodyParameters) {
31-
const index = find(param, objectsByName[param.name]);
32-
const key = `${param.name}:${index}`;
64+
for (const param of getParams(webhook)) {
65+
const name = getParamName(param);
66+
const index = find(param, objectsByName[name]);
67+
const key = `${name}:${index}`;
3368
if (objectCount[key] > 1) {
34-
newParams.push(indexForParam(param, index, bodyParamIndexMap, duplicatedBodyParams));
69+
newParams.push(indexForParam(param, name, index, bodyParamIndexMap, duplicatedBodyParams));
3570
} else {
3671
// If an object is only used once, keep it inline
3772
newParams.push(param);
3873
}
3974
}
4075

41-
webhook.bodyParameters = newParams;
76+
setParams(webhook, newParams);
4277
}
4378

4479
return duplicatedBodyParams;
@@ -74,11 +109,12 @@ function find(param: any, objects: any[]): number {
74109

75110
function indexForParam(
76111
param: any,
112+
paramName: string,
77113
paramNameIndex: number,
78114
objectIndexMap: Record<string, number>,
79115
duplicatedBodyParams: any[]
80116
): number {
81-
const key = `${param.name}:${paramNameIndex}`;
117+
const key = `${paramName}:${paramNameIndex}`;
82118

83119
const existingIndex = objectIndexMap[key];
84120
if (existingIndex !== undefined) {

0 commit comments

Comments
 (0)