From 19c7a05be69f4304dcdf10e9a07195136e9729fa Mon Sep 17 00:00:00 2001 From: babywolf Date: Thu, 23 Apr 2026 17:57:37 +0100 Subject: [PATCH 1/5] v2.0.0-alpha MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 🐞 fix code directive number being spaced out - ⚠️ experimental LSP Signed-off-by: babywolf --- .github/workflows/test.yml | 56 +++ .gitignore | 2 + .vscodeignore | 11 +- README.rst | 1 + docs/diagnostics.rst | 191 ++++++++++ docs/gaslsp.toml | 70 ++++ extension.js | 70 ++++ package-lock.json | 99 ++++- package.json | 219 ++++++++--- snippets/gas.code-snippets | 2 +- src/.editorconfig | 8 + src/.gitattributes | 8 + src/.gitignore | 27 ++ src/config.v | 277 ++++++++++++++ src/diagnostics.v | 715 +++++++++++++++++++++++++++++++++++ src/handlers.v | 381 +++++++++++++++++++ src/indexer.v | 205 ++++++++++ src/main.v | 199 ++++++++++ src/parser.v | 408 ++++++++++++++++++++ src/rpc.v | 200 ++++++++++ src/tables/instrs.csv | 106 ++++++ src/tables/regs.csv | 124 ++++++ src/v.mod | 7 + syntaxes/gas.tmLanguage.json | 2 +- tests/d001_missing_suffix.s | 4 + tests/d012_pushb.s | 7 + tests/d020_todo.s | 4 + tests/integration.sh | 37 ++ tests/new_opcodes.s | 33 ++ tests/ring0.s | 13 + tests/test_diags.sh | 118 ++++++ 31 files changed, 3527 insertions(+), 77 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 docs/diagnostics.rst create mode 100644 docs/gaslsp.toml create mode 100644 extension.js create mode 100644 src/.editorconfig create mode 100644 src/.gitattributes create mode 100644 src/.gitignore create mode 100644 src/config.v create mode 100644 src/diagnostics.v create mode 100644 src/handlers.v create mode 100644 src/indexer.v create mode 100644 src/main.v create mode 100644 src/parser.v create mode 100644 src/rpc.v create mode 100644 src/tables/instrs.csv create mode 100644 src/tables/regs.csv create mode 100644 src/v.mod create mode 100644 tests/d001_missing_suffix.s create mode 100644 tests/d012_pushb.s create mode 100644 tests/d020_todo.s create mode 100755 tests/integration.sh create mode 100644 tests/new_opcodes.s create mode 100644 tests/ring0.s create mode 100755 tests/test_diags.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..a653ff5 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,56 @@ +name: Test + +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + workflow_dispatch: + +jobs: + test-v: + name: V Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install V + uses: vlang/setup-v@v1.4 + + - name: Get V version + run: v --version + + - name: Build gaslsp + run: | + cd src + v -o gaslsp . + + - name: Run integration tests + run: | + mkdir -p ~/.local/bin + cp src/gaslsp ~/.local/bin/gaslsp + cp -r src/tables ~/.local/bin/ + chmod +x tests/integration.sh test_diags.sh + tests/integration.sh + + test-diags: + name: Diagnostic Tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install V + uses: vlang/setup-v@v1.4 + + - name: Build gaslsp + run: | + cd src + v -o gaslsp . + + - name: Install and test diagnostics + run: | + mkdir -p ~/.local/bin + cp src/gaslsp ~/.local/bin/gaslsp + cp -r src/tables ~/.local/bin/ + chmod +x tests/test_diags.sh + tests/test_diags.sh \ No newline at end of file diff --git a/.gitignore b/.gitignore index aeee732..cb53200 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ node_modules *.vsix + +gaslsp diff --git a/.vscodeignore b/.vscodeignore index f369b5e..5e607bf 100644 --- a/.vscodeignore +++ b/.vscodeignore @@ -1,4 +1,9 @@ -.vscode/** -.vscode-test/** +.vscode/ +.github/ .gitignore -vsc-extension-quickstart.md +.gitattributes +.editorconfig + +src/ +docs/ +tests/ diff --git a/README.rst b/README.rst index 62e5e48..a4f2f53 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,4 @@ ============================================= Yet another AT&T/GAS assembly utils extension ============================================= + diff --git a/docs/diagnostics.rst b/docs/diagnostics.rst new file mode 100644 index 0000000..6de3495 --- /dev/null +++ b/docs/diagnostics.rst @@ -0,0 +1,191 @@ +Diagnostic Codes +================= + +This document lists all diagnostic codes produced by gaslsp. + +Severity +-------- + +- **Error**: Compilation will fail +- **Warning**: May cause unexpected behavior + +Size Diagnostics (D001-D003) +---------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D001 + - Error + - Missing size suffix with no operand to infer size from (e.g., ``push $42``) + + * - D002 + - Warning + - No size suffix inferred; size inferred from operand register + + * - D003 + - Error + - Operand size mismatch: suffix size doesn't match register size + +Operand Diagnostics (D004-D005, D009-D010, D018) +---------------------------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D004 + - Warning + - Immediate value doesn't fit in operand size (e.g., ``$256`` for 8-bit) + + * - D005 + - Error + - High-byte register (``%ah``, ``%bh``, ``%ch``, ``%dh``) conflicts with REX prefix + + * - D009 + - Error + - 32-bit base register (``%ebp``, ``%ebx``, etc.) in 64-bit memory operand + + * - D010 + - Warning + - Source and destination registers are the same (no-op) + + * - D018 + - Error + - Incomplete label: identifier without colon that isn't an instruction + +Encoding Diagnostics (D011-D015) +-------------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D011 + - Error + - ``div``/``idiv`` with immediate operand (not encodable) + + * - D012 + - Error + - ``pushb`` not encodable; push only supports 16/32/64-bit + + * - D013 + - Warning + - One-operand ``imul``: high half of result in ``%rdx`` may be unexpected + + * - D014 + - Warning + - ``mul`` is unsigned; upper half in ``%rdx`` may be silently discarded + + * - D015 + - Error + - Shift count must be ``%cl`` or immediate; other registers not encodable + +ABI Diagnostics (D016-D017) +--------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D016 + - Warning + - ``syscall`` clobbers ``%rcx`` and ``%r11`` + + * - D017 + - Warning + - ``int $0x80`` is 32-bit syscall ABI; use ``syscall`` instead + +Symbol Diagnostics (D006-D008, D019) +------------------------------------ + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D006 + - Error + - Undefined symbol reference + + * - D007 + - Warning + - Symbol referenced from another file but not declared ``.global`` + + * - D008 + - Error + - Duplicate symbol definition + + * - D019 + - Warning + - ``_start`` or ``main`` defined but not exported + +State Diagnostics (D034) +------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D034 + - Warning + - Register may be read before being written (uninitialized) + +Statement Diagnostics (D020) +------------------------------ + +.. list-table:: + :header-rows: 1 + :widths: 15 15 70 + + * - Code + - Severity + - Description + + * - D020 + - Hint + - TODO/FIXME/HACK/XXX/BUG comment found + +Configuration +-------------- + +Diagnostics can be suppressed or promoted to errors in ``gaslsp.toml``: + +.. code-block:: toml + + [diagnostics] + suppress = ["D002", "D014"] # Disable specific codes + warnings_as_errors = ["D010"] # Promote warnings to errors + + [diagnostics.categories] + size = true # D001-D003 + operand = true # D004-D005, D009-D010, D018 + encoding = true # D011-D015 + abi = true # D016-D017 + symbol = true # D006-D008, D019 + state = true # D034: uninitialized register tracking + statements = true # D020: TODO/FIXME comments diff --git a/docs/gaslsp.toml b/docs/gaslsp.toml new file mode 100644 index 0000000..2ceeb48 --- /dev/null +++ b/docs/gaslsp.toml @@ -0,0 +1,70 @@ +# gaslsp.toml - GAS Assembly LSP Configuration +# +# This file configures diagnostics and behavior for gaslsp. +# Place it in your project root or at ~/.config/gaslsp/gaslsp.toml + +# ───────────────────────────────────────────────────────────────── +# General Settings +# ───────────────────────────────────────────────────────────────── +[general] +mode = "auto" # "auto" or "strict" +recursive = true # Recursively index subdirectories +extensions = [".s", ".S", ".asm"] # File extensions to recognize + +# ───────────────────────────────────────────────────────────────── +# Indexing +# ───────────────────────────────────────────────────────────────── +[indexing] +scope = "workspace" # "workspace", "includes", or "open" +follow_external_includes = false # Follow .include directives outside workspace +extra_dirs = [] # Additional directories to search for includes + +# ───────────────────────────────────────────────────────────────── +# Diagnostics +# ───────────────────────────────────────────────────────────────── +[diagnostics] +enabled = true + +# Suppress specific diagnostic codes (can be "D001", "D002", etc.) +suppress = [] + +# Promote specific warnings to errors +warnings_as_errors = [] + +# Enable/disable diagnostic categories +[diagnostics.categories] +size = true # D001-D003: size suffix issues +operand = true # D004-D005, D009-D010, D018: operand issues +encoding = true # D011-D015: encoding issues +abi = true # D016-D017: ABI issues +symbol = true # D006-D008, D019: symbol issues +state = true # D034: uninitialized register tracking +statements = true # D020: TODO/FIXME comments + +# Enable/disable severity levels +[diagnostics.levels] +error = true +warning = true +hint = true + +# ───────────────────────────────────────────────────────────────── +# Inference (size suffix inferring) +# ───────────────────────────────────────────────────────────────── +[infer] +warn_inferred_size = true # Warn when suffix is inferred from operand +warn_mode_mismatch = true # Warn when mode doesn't match + +# ───────────────────────────────────────────────────────────────── +# Symbols +# ───────────────────────────────────────────────────────────────── +[symbols] +warn_dead_exports = false # Warn when .global symbol is never referenced +warn_missing_global = true # Warn when cross-file reference lacks .global + +# ───────────────────────────────────────────────────────────────── +# ABI +# ───────────────────────────────────────────────────────────────── +[abi] +convention = "sysv" # "sysv" (Linux/Unix) or "ms" (Windows) +warn_syscall_clobber = true # Warn about syscall clobbering rcx/r11 +warn_legacy_syscall = true # Warn about int $0x80 (32-bit syscall) diff --git a/extension.js b/extension.js new file mode 100644 index 0000000..c80dd9e --- /dev/null +++ b/extension.js @@ -0,0 +1,70 @@ +const { workspace, window } = require('vscode'); +const path = require('path'); +const os = require('os'); +const fs = require('fs'); + +let client; +let outputChannel; + +function activate(context) { + outputChannel = window.createOutputChannel('GAS Assembly LSP'); + context.subscriptions.push(outputChannel); + + const bin = process.env.GASLSP_BIN + || path.join(os.homedir(), '.local', 'bin', 'gaslsp'); + + log(`binary path: ${bin}`); + log(`binary exists: ${fs.existsSync(bin)}`); + + if (!fs.existsSync(bin)) { + window.showErrorMessage(`gaslsp: binary not found at ${bin}`); + return; + } + + let LanguageClient, TransportKind; + try { + ({ LanguageClient, TransportKind } = require('vscode-languageclient/node')); + } catch (e) { + log(`failed to load vscode-languageclient: ${e.message}`); + window.showErrorMessage('gaslsp: run npm install'); + return; + } + + const serverOptions = { + command: bin, + transport: TransportKind.stdio, + options: { env: process.env } + }; + + const clientOptions = { + documentSelector: [ + { scheme: 'file', language: '{asm,gas}' }, + { scheme: 'file', pattern: '**/*.{s,S,asm}' }, + ], + synchronize: { + fileEvents: workspace.createFileSystemWatcher('**/*.{s,S,asm}') + }, + outputChannel: outputChannel, + }; + + try { + client = new LanguageClient('gaslsp', 'GAS Assembly LSP', serverOptions, clientOptions); + const disposable = client.start(); + context.subscriptions.push(disposable); + log('client started'); + } catch (e) { + log(`error: ${e.message}`); + window.showErrorMessage(`gaslsp error: ${e.message}`); + } +} + +function log(msg) { + const ts = new Date().toISOString().split('T')[1].slice(0, -1); + outputChannel.appendLine(`[${ts}] ${msg}`); +} + +function deactivate() { + if (client) return client.stop(); +} + +module.exports = { activate, deactivate }; diff --git a/package-lock.json b/package-lock.json index afd1ae6..c2a24ba 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,88 @@ { - "name": "assembly-utils", - "version": "1.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "assembly-utils", - "version": "1.0.0", - "engines": { - "vscode": "^1.99.0" - } - } - } + "name": "assembly-utils", + "version": "2.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "assembly-utils", + "version": "2.0.0", + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "engines": { + "vscode": "^1.99.0" + } + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "node_modules/brace-expansion": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.0.tgz", + "integrity": "sha512-TN1kCZAgdgweJhWWpgKYrQaMNHcDULHkWwQIspdtjV4Y5aurRdZpjAqn6yX3FPqTA9ngHCc4hJxMAMgGfve85w==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/minimatch": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz", + "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "dependencies": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + }, + "engines": { + "vscode": "^1.82.0" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==" + } + } } diff --git a/package.json b/package.json index 6d932ff..29ef294 100644 --- a/package.json +++ b/package.json @@ -1,60 +1,161 @@ { - "name": "assembly-utils", - "displayName": "assembly-utils", - "description": "", - "version": "1.0.0", - "publisher": "babywolf", - "repository": { - "url": "https://github.com/fgsoftware1/assembly-utils-vscode" - }, - "engines": { - "vscode": "^1.99.0" - }, - "categories": [ - "Snippets", - "Programming Languages" - ], - "contributes": { - "languages": [ - { - "id": "gas", - "aliases": ["gas", "GAS", "GNU Assembler"], - "extensions": [".asm", ".s", ".S"] - } - ], - "snippets": [ - { - "language": "gas", - "path": "./snippets/gas.code-snippets" - } - ], - "grammars": [ - { - "language": "gas", - "scopeName": "source.assembly", - "path": "./syntaxes/gas.tmLanguage.json" - } - ], - "configurationDefaults": { - "editor.tokenColorCustomizations": { - "textMateRules": [ - {"scope": "comment.line.assembly", "settings": {"foreground": "#888888", "fontStyle": "italic"}}, - {"scope": "string.quoted.double.assembly", "settings": {"foreground": "#CE9178"}}, - {"scope": "constant.character.escape.assembly", "settings": {"foreground": "#D7BA7D"}}, - {"scope": "entity.name.directive.assembly", "settings": {"foreground": "#bf0c2f", "fontStyle": "bold"}}, - {"scope": "entity.name.gnu.directive.assembly", "settings": {"foreground": "#9966CC", "fontStyle": "bold"}}, - {"scope": "entity.name.label.assembly", "settings": {"foreground": "#FFA500", "fontStyle": "bold"}}, - {"scope": "entity.name.label.local.assembly", "settings": {"foreground": "#FF69B4", "fontStyle": "bold"}}, - {"scope": "keyword.instruction.assembly", "settings": {"foreground": "#1976D2", "fontStyle": "bold"}}, - {"scope": "variable.register.assembly", "settings": {"foreground": "#FFFF66", "fontStyle": "bold"}}, - {"scope": "variable.other.assembly", "settings": {"foreground": "#40FFFF"}}, - {"scope": "constant.numeric.hex.assembly", "settings": {"foreground": "#FF6B9D"}}, - {"scope": "constant.numeric.octal.assembly", "settings": {"foreground": "#C0FF00"}}, - {"scope": "constant.numeric.decimal.assembly", "settings": {"foreground": "#FFFF00"}}, - {"scope": "constant.numeric.immediate.assembly", "settings": {"foreground": "#FFA500"}}, - {"scope": "constant.numeric.immediate.operand.assembly", "settings": {"foreground": "#00FF00"}} - ] - } - } - } -} + "name": "assembly-utils", + "displayName": "assembly-utils", + "description": "", + "version": "2.0.0", + "publisher": "babywolf", + "repository": { + "url": "https://github.com/fgsoftware1/assembly-utils-vscode" + }, + "engines": { + "vscode": "^1.99.0" + }, + "categories": [ + "Snippets", + "Programming Languages" + ], + "activationEvents": [ + "onLanguage:asm", + "onLanguage:gas", + "workspaceContains:**/*.s", + "workspaceContains:**/*.S", + "workspaceContains:**/*.asm" + ], + "main": "./extension.js", + "contributes": { + "languages": [ + { + "id": "gas", + "aliases": [ + "gas", + "GAS", + "GNU Assembler" + ], + "extensions": [ + ".asm", + ".s", + ".S" + ] + } + ], + "snippets": [ + { + "language": "gas", + "path": "./snippets/gas.code-snippets" + } + ], + "grammars": [ + { + "language": "gas", + "scopeName": "source.assembly", + "path": "./syntaxes/gas.tmLanguage.json" + } + ], + "configurationDefaults": { + "editor.tokenColorCustomizations": { + "textMateRules": [ + { + "scope": "comment.line.assembly", + "settings": { + "foreground": "#888888", + "fontStyle": "italic" + } + }, + { + "scope": "string.quoted.double.assembly", + "settings": { + "foreground": "#CE9178" + } + }, + { + "scope": "constant.character.escape.assembly", + "settings": { + "foreground": "#D7BA7D" + } + }, + { + "scope": "entity.name.directive.assembly", + "settings": { + "foreground": "#bf0c2f", + "fontStyle": "bold" + } + }, + { + "scope": "entity.name.gnu.directive.assembly", + "settings": { + "foreground": "#9966CC", + "fontStyle": "bold" + } + }, + { + "scope": "entity.name.label.assembly", + "settings": { + "foreground": "#FFA500", + "fontStyle": "bold" + } + }, + { + "scope": "entity.name.label.local.assembly", + "settings": { + "foreground": "#FF69B4", + "fontStyle": "bold" + } + }, + { + "scope": "keyword.instruction.assembly", + "settings": { + "foreground": "#1976D2", + "fontStyle": "bold" + } + }, + { + "scope": "variable.register.assembly", + "settings": { + "foreground": "#FFFF66", + "fontStyle": "bold" + } + }, + { + "scope": "variable.other.assembly", + "settings": { + "foreground": "#40FFFF" + } + }, + { + "scope": "constant.numeric.hex.assembly", + "settings": { + "foreground": "#FF6B9D" + } + }, + { + "scope": "constant.numeric.octal.assembly", + "settings": { + "foreground": "#C0FF00" + } + }, + { + "scope": "constant.numeric.decimal.assembly", + "settings": { + "foreground": "#FFFF00" + } + }, + { + "scope": "constant.numeric.immediate.assembly", + "settings": { + "foreground": "#FFA500" + } + }, + { + "scope": "constant.numeric.immediate.operand.assembly", + "settings": { + "foreground": "#00FF00" + } + } + ] + } + } + }, + "dependencies": { + "vscode-languageclient": "^9.0.1" + } +} \ No newline at end of file diff --git a/snippets/gas.code-snippets b/snippets/gas.code-snippets index 3418943..d9fa826 100644 --- a/snippets/gas.code-snippets +++ b/snippets/gas.code-snippets @@ -2,7 +2,7 @@ "bits": { "prefix": "bits", "body": [ - ".code ${1|16,32,64|}" + ".code${1|16,32,64|}" ] }, "global decl": { diff --git a/src/.editorconfig b/src/.editorconfig new file mode 100644 index 0000000..01072ca --- /dev/null +++ b/src/.editorconfig @@ -0,0 +1,8 @@ +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.v] +indent_style = tab diff --git a/src/.gitattributes b/src/.gitattributes new file mode 100644 index 0000000..9a98968 --- /dev/null +++ b/src/.gitattributes @@ -0,0 +1,8 @@ +* text=auto eol=lf +*.bat eol=crlf + +*.v linguist-language=V +*.vv linguist-language=V +*.vsh linguist-language=V +v.mod linguist-language=V +.vdocignore linguist-language=ignore diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..716de72 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,27 @@ +# Binaries for programs and plugins +main + +*.exe +*.exe~ +*.so +*.dylib +*.dll + +# Ignore binary output folders +bin/ + +# Ignore common editor/system specific metadata +.DS_Store +.idea/ +.vscode/ +*.iml + +# ENV +.env + +# vweb and database +*.db +*.js + +# Ignore installed modules through `v install --local`: +modules/ diff --git a/src/config.v b/src/config.v new file mode 100644 index 0000000..c3146e5 --- /dev/null +++ b/src/config.v @@ -0,0 +1,277 @@ +// config.v +// Parses gaslsp.toml into a Config struct. +// Minimal hand-rolled TOML parser — only handles the subset used by gaslsp.toml. +// No external dependencies. + +module main + +import os + +pub struct DiagCategories { +pub mut: + size bool = true + truncation bool = true + register bool = true + symbol bool = true + directive bool = true + operand bool = true + encoding bool = true + abi bool = true + state bool = true + statements bool = true +} + +pub struct DiagLevels { +pub mut: + error bool = true + warning bool = true + hint bool = true +} + +pub struct DiagConfig { +pub mut: + enabled bool = true + suppress []string + categories DiagCategories + levels DiagLevels + warnings_as_errors []string +} + +pub struct InferConfig { +pub mut: + warn_inferred_size bool = true + warn_mode_mismatch bool = true +} + +pub struct SymbolConfig { +pub mut: + warn_dead_exports bool + warn_missing_global bool = true +} + +pub struct AbiConfig { +pub mut: + convention string = 'sysv' + warn_syscall_clobber bool = true + warn_legacy_syscall bool = true +} + +pub enum IndexScope { + workspace + includes + open +} + +pub struct IndexingConfig { +pub mut: + scope IndexScope = .workspace + follow_external_includes bool + extra_dirs []string +} + +pub struct GeneralConfig { +pub mut: + mode string = 'auto' + extensions []string = ['.s', '.S', '.asm'] + recursive bool = true +} + +pub struct Config { +pub mut: + general GeneralConfig + diagnostics DiagConfig + infer InferConfig + symbols SymbolConfig + abi AbiConfig + indexing IndexingConfig +} + +// Load config from path. Falls back to defaults if file missing. +pub fn load(path string) Config { + mut cfg := Config{} + content := os.read_file(path) or { return cfg } + parse_toml(content, mut cfg) + return cfg +} + +// Find config: project root first, then ~/.config/gaslsp/gaslsp.toml +pub fn find_and_load(workspace string) Config { + project_cfg := os.join_path(workspace, 'gaslsp.toml') + if os.exists(project_cfg) { + return load(project_cfg) + } + home := os.home_dir() + user_cfg := os.join_path(home, '.config', 'gaslsp', 'gaslsp.toml') + if os.exists(user_cfg) { + return load(user_cfg) + } + return Config{} +} + +// Check if a diagnostic code is suppressed +pub fn (c &Config) is_suppressed(code string) bool { + if !c.diagnostics.enabled { + return true + } + if code in c.diagnostics.suppress { + return true + } + return false +} + +// Check if a diagnostic should be emitted as error (promotion) +pub fn (c &Config) is_error(code string, original_level string) bool { + if original_level == 'error' { + return true + } + if code in c.diagnostics.warnings_as_errors { + return true + } + return false +} + +// ─── parser ────────────────────────────────────────────────────────────────── +// Handles: [section], [section.subsection], key = value, key = [array], # comments + +fn parse_toml(content string, mut cfg Config) { + mut section := '' + for raw_line in content.split_into_lines() { + line := raw_line.trim_space() + if line.starts_with('#') || line.len == 0 { + continue + } + + // section header + if line.starts_with('[') && line.ends_with(']') { + section = line[1..line.len - 1].trim_space() + continue + } + + // key = value + eq := line.index('=') or { continue } + key := line[..eq].trim_space() + value := line[eq + 1..].trim_space() + + apply(key, value, section, mut cfg) + } +} + +fn apply(key string, value string, section string, mut cfg Config) { + // strip inline comments from value + comment_idx := value.index('#') or { -1 } + clean_value := if comment_idx >= 0 { value[..comment_idx] } else { value }.trim_space() + match section { + 'indexing' { + match key { + 'scope' { + cfg.indexing.scope = match unquote(clean_value) { + 'includes' { IndexScope.includes } + 'open' { IndexScope.open } + else { IndexScope.workspace } + } + } + 'follow_external_includes' { + cfg.indexing.follow_external_includes = clean_value == 'true' + } + 'extra_dirs' { + cfg.indexing.extra_dirs = parse_str_array(clean_value) + } + else {} + } + } + 'general' { + match key { + 'mode' { cfg.general.mode = unquote(clean_value) } + 'recursive' { cfg.general.recursive = clean_value == 'true' } + 'extensions' { cfg.general.extensions = parse_str_array(clean_value) } + else {} + } + } + 'diagnostics' { + match key { + 'enabled' { cfg.diagnostics.enabled = clean_value == 'true' } + 'suppress' { cfg.diagnostics.suppress = parse_str_array(clean_value) } + 'warnings_as_errors' { cfg.diagnostics.warnings_as_errors = parse_str_array(clean_value) } + else {} + } + } + 'diagnostics.categories' { + v := clean_value == 'true' + match key { + 'size' { cfg.diagnostics.categories.size = v } + 'truncation' { cfg.diagnostics.categories.truncation = v } + 'register' { cfg.diagnostics.categories.register = v } + 'symbol' { cfg.diagnostics.categories.symbol = v } + 'directive' { cfg.diagnostics.categories.directive = v } + 'operand' { cfg.diagnostics.categories.operand = v } + 'encoding' { cfg.diagnostics.categories.encoding = v } + 'abi' { cfg.diagnostics.categories.abi = v } + 'state' { cfg.diagnostics.categories.state = v } + 'statements' { cfg.diagnostics.categories.statements = v } + else {} + } + } + 'diagnostics.levels' { + v := clean_value == 'true' + match key { + 'error' { cfg.diagnostics.levels.error = v } + 'warning' { cfg.diagnostics.levels.warning = v } + 'hint' { cfg.diagnostics.levels.hint = v } + else {} + } + } + 'infer' { + v := clean_value == 'true' + match key { + 'warn_inferred_size' { cfg.infer.warn_inferred_size = v } + 'warn_mode_mismatch' { cfg.infer.warn_mode_mismatch = v } + else {} + } + } + 'symbols' { + v := clean_value == 'true' + match key { + 'warn_dead_exports' { cfg.symbols.warn_dead_exports = v } + 'warn_missing_global' { cfg.symbols.warn_missing_global = v } + else {} + } + } + 'abi' { + match key { + 'convention' { cfg.abi.convention = unquote(clean_value) } + 'warn_syscall_clobber' { cfg.abi.warn_syscall_clobber = clean_value == 'true' } + 'warn_legacy_syscall' { cfg.abi.warn_legacy_syscall = clean_value == 'true' } + else {} + } + } + else {} + } +} + +// "hello" → hello +fn unquote(s string) string { + if s.starts_with('"') && s.ends_with('"') { + return s[1..s.len - 1] + } + return s +} + +// ["D001", "D002"] → ['D001', 'D002'] +fn parse_str_array(s string) []string { + trimmed := s.trim_space() + if !trimmed.starts_with('[') { + return [] + } + inner := trimmed[1..trimmed.len - 1] + if inner.trim_space().len == 0 { + return [] + } + mut result := []string{} + for part in inner.split(',') { + v := unquote(part.trim_space()) + if v.len > 0 { + result << v + } + } + return result +} diff --git a/src/diagnostics.v b/src/diagnostics.v new file mode 100644 index 0000000..16bb71a --- /dev/null +++ b/src/diagnostics.v @@ -0,0 +1,715 @@ +// diagnostics.v +// Runs diagnostic checks on parsed lines and publishes results via LSP + +module main + +import os +import encoding.csv + +// ─── types ──────────────────────────────────────────────────────────────────── + +pub enum DiagSeverity { + error = 1 + warning = 2 + hint = 3 +} + +pub struct DiagRange { +pub: + line int + col_start int + col_end int +} + +pub struct Diag { +pub: + code string + severity DiagSeverity + message string + range DiagRange +} + +// ─── register tracking ───────────────────────────────────────────────────────── + +struct RegisterTracker { +mut: + initialized map[string]bool // which registers have been written +} + +fn new_register_tracker() RegisterTracker { + return RegisterTracker{} +} + +fn (mut t RegisterTracker) reset() { + t.initialized = map[string]bool{} +} + +fn (t &RegisterTracker) is_init(reg string) bool { + return t.initialized[reg] +} + +fn (mut t RegisterTracker) mark_init(reg string) { + t.initialized[reg] = true +} + +fn (mut t RegisterTracker) mark_clobbered(reg string) { + t.initialized[reg] = false +} + +// Clobber registers modified by an instruction +fn (mut t RegisterTracker) clobber_instr(mnemonic string) { + match mnemonic { + 'div', 'idiv' { + } + 'call' { + } + 'ret' { + } + else {} + } +} + +// Check if a register is read before being initialized +fn (t &RegisterTracker) check_uninit_read(reg string) bool { + return !t.is_init(reg) +} + +// Get full-width register name (e.g., "eax" -> "rax") +fn full_reg(reg string) string { + return match reg { + 'al', 'ah', 'ax', 'eax', 'rax' { 'rax' } + 'bl', 'bh', 'bx', 'ebx', 'rbx' { 'rbx' } + 'cl', 'ch', 'cx', 'ecx', 'rcx' { 'rcx' } + 'dl', 'dh', 'dx', 'edx', 'rdx' { 'rdx' } + 'si', 'esi', 'rsi' { 'rsi' } + 'di', 'edi', 'rdi' { 'rdi' } + 'bp', 'ebp', 'rbp' { 'rbp' } + 'sp', 'esp', 'rsp' { 'rsp' } + 'bpl' { 'rbp' } + 'spl' { 'rsp' } + else { reg } + } +} + +// Callee-saved registers on x86-64 Linux +fn is_callee_saved(reg string) bool { + full := full_reg(reg) + return full in ['rbx', 'rbp', 'r12', 'r13', 'r14', 'r15'] +} + +// ─── diag table entry ───────────────────────────────────────────────────────── + +struct DiagDef { + code string + level string + category string + template string +} + +// ─── engine ─────────────────────────────────────────────────────────────────── + +pub struct DiagEngine { +pub mut: + cfg Config + defs []DiagDef + tables Tables + indexer ?&Indexer +} + +pub fn new_diag_engine(cfg Config, tables Tables, indexer &Indexer) DiagEngine { + data_dir := resolve_data_dir() + defs := load_diag_defs(os.join_path(data_dir, 'diagnostics.csv')) + return DiagEngine{ + cfg: cfg + defs: defs + tables: tables + indexer: indexer + } +} + +fn (eng &DiagEngine) get_indexer() &Indexer { + return eng.indexer or { panic('DiagEngine: indexer not set') } +} + +fn load_diag_defs(path string) []DiagDef { + raw := os.read_file(path) or { return [] } + mut r := csv.new_reader(raw) + r.read() or { return [] } + mut out := []DiagDef{} + for { + row := r.read() or { break } + if row.len < 4 { + continue + } + out << DiagDef{ + code: row[0].trim_space() + level: row[1].trim_space() + category: row[2].trim_space() + template: row[3].trim_space() + } + } + return out +} + +// ─── publish ────────────────────────────────────────────────────────────────── + +// Run all checks on a file and publish diagnostics +pub fn (mut eng DiagEngine) publish(path string) { + if !eng.cfg.diagnostics.enabled { + return + } + + lines := os.read_lines(path) or { return } + mut diags := []Diag{} + + // track .global declarations for cross-check + mut globals := map[string]bool{} + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.directive == 'global' || l.directive == 'globl' { + for name in l.dir_args.split(',') { + globals[name.trim_space()] = true + } + } + } + + // register tracking for uninitialized reads + mut tracker := new_register_tracker() + mut in_function := false + + for i, raw in lines { + l := parse_line(raw, i + 1) + line_diags := eng.check_line(l, raw, globals, path) + diags << line_diags + + // Register tracking + if l.kind == .label { + // Reset tracker at function entry points + if l.label == '_start' || l.label == 'main' || l.label.ends_with(':entry') { + tracker.reset() + in_function = true + } + } + + if !in_function || (l.kind != .instruction && l.kind != .label_and_instruction) { + continue + } + + // Check for uninitialized reads + if eng.enabled('D034') && eng.cfg.diagnostics.categories.state && l.kind == .instruction { + // Source operands (all but last in AT&T syntax) + for idx := 0; idx < l.operands.len - 1; idx++ { + op := l.operands[idx] + if op.kind == .register { + full := full_reg(op.reg) + if tracker.check_uninit_read(full) { + diags << Diag{ + code: 'D034' + severity: .warning + message: "'${op.raw}' may be uninitialized (not written before this read)" + range: DiagRange{ line: i, col_start: 0, col_end: raw.len } + } + } + } + } + } + + // Mark registers as initialized after write instructions + if l.kind == .instruction { + dst_reg := register_written_by(l.mnemonic, l.operands) + if dst_reg.len > 0 { + tracker.mark_init(dst_reg) + // For div/imul, both EDX and EAX are used and result affects both + if l.mnemonic == 'div' || l.mnemonic == 'idiv' || l.mnemonic == 'mul' || l.mnemonic == 'imul' { + tracker.mark_init('rdx') + tracker.mark_init('rax') + } + } + // xor reg, reg marks reg as initialized (zeroing idiom) + // In AT&T: xor %eax, %eax means src=dst, so only one reg + if l.mnemonic == 'xor' && l.operands.len >= 2 { + if l.operands[0].kind == .register && l.operands[0].reg == l.operands[1].reg { + tracker.mark_init(full_reg(l.operands[0].reg)) + } + } + // mov: dst is last operand + if l.mnemonic == 'mov' || l.mnemonic == 'movq' || l.mnemonic == 'movl' || l.mnemonic == 'movw' || l.mnemonic == 'movb' { + if l.operands.len > 0 { + last := l.operands[l.operands.len - 1] + if last.kind == .register { + tracker.mark_init(full_reg(last.reg)) + } + } + } + // push doesn't init registers, pop does (AT&T: pop %dst) + if l.mnemonic == 'pop' && l.operands.len > 0 { + last := l.operands[l.operands.len - 1] + if last.kind == .register { + tracker.mark_init(full_reg(last.reg)) + } + } + // lea doesn't write registers (AT&T: lea 8(%rax), %rbx) + // Function calls clobber caller-saved registers + if l.mnemonic == 'call' { + for r in caller_saved_regs() { + tracker.mark_clobbered(r) + } + } + } + } + + // cross-file symbol checks + diags << eng.check_symbols(path, lines, globals) + + uri := path_to_uri(path) + arr := diags.map(diag_to_json(it)).join(",") + json_body := '{"uri":"' + uri + '","diagnostics":[' + arr + ']}' + send_notification("textDocument/publishDiagnostics", json_body) +} + +// Returns the register written by an instruction (for init tracking) +// In AT&T syntax: mov %src, %dst - dst is LAST operand +fn register_written_by(mnemonic string, operands []Operand) string { + if operands.len == 0 { + return '' + } + // Destination is LAST operand in AT&T syntax + last := operands[operands.len - 1] + if last.kind == .register { + return full_reg(last.reg) + } + // For ALU ops with memory dest, no register is written + if last.kind == .memory { + return '' + } + return '' +} + +// Returns registers READ by an instruction (source operands) +// In AT&T syntax: mov %src, %dst - src is ALL BUT LAST operand +fn registers_read(mnemonic string, operands []Operand) []string { + mut regs := []string{} + // All operands except the last one are sources (reads) + for i := 0; i < operands.len - 1; i++ { + op := operands[i] + if op.kind == .register { + regs << full_reg(op.reg) + } + } + return regs +} + +fn caller_saved_regs() []string { + return ['rax', 'rcx', 'rdx', 'rsi', 'rdi', 'r8', 'r9', 'r10', 'r11'] +} + +pub fn (mut eng DiagEngine) publish_workspace() { + ix := eng.get_indexer() + for path in ix.index.files { + eng.publish(path) + } +} + +// ─── line checks ───────────────────────────────────────────────────────────── + +fn (eng &DiagEngine) check_line(l Line, raw string, globals map[string]bool, path string) []Diag { + mut diags := []Diag{} + + // D018 incomplete label - line has label-like content but no colon + stripped := raw.trim_space() + if stripped.len > 0 && !stripped.starts_with('.') && !stripped.starts_with('#') && !stripped.contains(':') { + first_word := stripped.split(' ')[0].split(' ')[0] + if first_word.len > 0 { + if _ := eng.tables.find_instr(first_word) { + // first_word is an instruction, not a label + } else { + instr_match := stripped.index(' ') or { stripped.len } + rest := stripped[instr_match..].trim_space() + if rest.len == 0 || rest.starts_with('#') { + if eng.enabled('D018') && eng.cfg.diagnostics.categories.operand { + diags << eng.make(l, raw, 'D018', .error, "incomplete label: '${first_word}' has no colon") + } + } + } + } + } + + // TODO check - comments containing TODO/FIXME/HACK/XXX + if eng.enabled('D020') && eng.cfg.diagnostics.categories.statements { + todo_patterns := ['TODO', 'FIXME', 'HACK', 'XXX', 'BUG'] + upper := stripped.to_upper() + for pattern in todo_patterns { + if upper.contains(pattern) { + diags << eng.make(l, raw, 'D020', .hint, "TODO comment found: '${pattern}' - consider addressing this") + break + } + } + } + + if l.kind != .instruction && l.kind != .label_and_instruction { + return diags + } + + diags << eng.check_size(l, raw) + diags << eng.check_operands(l, raw) + diags << eng.check_encoding(l, raw) + diags << eng.check_abi(l, raw) + + return diags +} + +// D001 missing suffix, D002 inferred suffix, D003 mismatch +fn (eng &DiagEngine) check_size(l Line, raw string) []Diag { + mut diags := []Diag{} + + reg_ops := l.operands.filter(it.kind == .register) + + // check if instruction is known to accept suffixes + if instr := eng.tables.find_instr(l.mnemonic) { + if instr.suffixes.len == 0 { + return diags + } + } + + if l.suffix == 0 { + // For shift instructions with invalid count, only show D028, not D002 + is_shift := l.mnemonic in ['shl', 'shr', 'sar'] + invalid_count := is_shift && l.operands.len >= 1 && + l.operands[0].kind == .register && l.operands[0].reg != 'cl' + + if reg_ops.len == 0 { + // No register operands to infer from + if eng.enabled('D001') { + diags << eng.make(l, raw, 'D001', .error, "suffix or operands needed for '${l.mnemonic}'") + } + } else if !invalid_count { + if eng.enabled('D002') && eng.cfg.infer.warn_inferred_size { + inferred := suffix_for_width(reg_ops[0].width) + diags << eng.make(l, raw, 'D002', .warning, "no size suffix on '${l.mnemonic}', inferring ${inferred} from operand '%${reg_ops[0].reg}'") + } + } + } else { + // explicit suffix — check consistency against register operands + suffix_bits := suffix_width(l.suffix) + if suffix_bits > 0 { + mismatched := reg_ops.filter(it.width > 0 && it.width != suffix_bits) + if mismatched.len > 0 && eng.enabled('D003') { + op := mismatched[0] + diags << eng.make(l, raw, 'D003', .error, "operand size mismatch: '${l.mnemonic}' has suffix '${l.suffix.ascii_str()}' (${suffix_bits}-bit) but '%${op.reg}' is ${op.width}-bit") + } + } + } + + return diags +} + +// D004 truncation, D005 REX+high-byte, D009 32-bit mem in 64-bit +fn (eng &DiagEngine) check_operands(l Line, raw string) []Diag { + mut diags := []Diag{} + suffix_bits := if l.suffix != 0 { suffix_width(l.suffix) } else { 0 } + + for op in l.operands { + match op.kind { + .immediate { + if suffix_bits > 0 { + min_s := min_signed(suffix_bits) + max_s := max_signed(suffix_bits) + if op.imm < min_s || op.imm > max_s { + if eng.enabled('D004') { + diags << eng.make(l, raw, 'D004', .warning, 'immediate ${op.raw} truncated: value ${op.imm} does not fit in ${suffix_bits} bits (range ${min_s}..${max_s})') + } + } + } + } + .register { + high_byte := op.reg in ['ah', 'bh', 'ch', 'dh'] + rex_regs := l.operands.any(it.kind == .register + && it.reg in ['sil', 'dil', 'spl', 'bpl', 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15', 'r8b', 'r9b', 'r10b', 'r11b', 'r12b', 'r13b', 'r14b', 'r15b', 'r8w', 'r9w', 'r10w', 'r11w', 'r12w', 'r13w', 'r14w', 'r15w', 'r8d', 'r9d', 'r10d', 'r11d', 'r12d', 'r13d', 'r14d', 'r15d']) + if high_byte && rex_regs && eng.enabled('D005') { + diags << eng.make(l, raw, 'D005', .error, "high-byte register (%ah, %bh, %ch, %dh) conflicts with REX prefix") + } + } + .memory { + // D009 — 32-bit base register in memory operand + // crude check: look for (%e__) pattern + if op.raw.contains('(%e') && eng.enabled('D009') { + diags << eng.make(l, raw, 'D009', .error, "memory operand '${op.raw}' uses 32-bit base register in 64-bit mode; consider using the 64-bit equivalent") + } + } + else {} + } + } + + // D010 src == dst — check once per instruction + if l.operands.len == 2 && l.mnemonic != 'xor' { + src, dst := l.operands[0], l.operands[1] + if src.kind == .register && dst.kind == .register && src.reg == dst.reg + && eng.enabled('D010') { + diags << eng.make(l, raw, 'D010', .warning, "'${l.mnemonic}': source and destination are the same register '%${src.reg}'; instruction has no effect") + } + } + + return diags +} + +// D011 div-by-immediate, D012 pushb, D013 one-operand imul, D014 mul unsigned, D015 shift count +fn (eng &DiagEngine) check_encoding(l Line, raw string) []Diag { + mut diags := []Diag{} + + match l.mnemonic { + 'div', 'idiv' { + // D011 — div with immediate + if l.operands.any(it.kind == .immediate) && eng.enabled('D011') { + diags << eng.make(l, raw, 'D011', .error, "'${l.mnemonic}' does not support immediate operands; load divisor into a register first") + } + } + 'imul' { + // D013 — one-operand imul + if l.operands.len == 1 && eng.enabled('D013') { + diags << eng.make(l, raw, 'D013', .warning, "'imul' one-operand form: high half of result in rdx may be unexpected; did you want the two-operand form?") + } + } + 'mul' { + // D014 — mul vs imul + if eng.enabled('D014') { + diags << eng.make(l, raw, 'D014', .warning, "'mul' is unsigned multiply; upper half stored in rdx may be silently discarded; use 'imul' if signed") + } + } + 'shl', 'shr', 'sar' { + // D015 — shift count must be imm8 or %cl + if l.operands.len >= 1 { + count := l.operands[0] + if count.kind == .register && count.reg != 'cl' && eng.enabled('D015') { + diags << eng.make(l, raw, 'D015', .error, "shift count must be %cl or an immediate; '%${count.reg}' is not encodable") + } + } + } + 'push' { + // D012 — pushb not encodable + if l.suffix == `b` && eng.enabled('D012') { + diags << eng.make(l, raw, 'D012', .error, "'pushb' is not encodable; push only supports 16/32/64-bit operands") + } + } + else {} + } + + return diags +} + +// D016 syscall clobber, D017 int $0x80 +fn (eng &DiagEngine) check_abi(l Line, raw string) []Diag { + mut diags := []Diag{} + if !eng.cfg.diagnostics.categories.abi { + return diags + } + + match l.mnemonic { + 'syscall' { + if eng.enabled('D016') && eng.cfg.abi.warn_syscall_clobber { + diags << eng.make(l, raw, 'D016', .warning, "'syscall' clobbers %rcx and %r11; save them if their values are needed after the call") + } + } + 'int' { + if l.operands.len > 0 && l.operands[0].raw == '$0x80' { + if eng.enabled('D017') && eng.cfg.abi.warn_legacy_syscall { + diags << eng.make(l, raw, 'D017', .warning, "'int $0x80' is the 32-bit Linux syscall ABI; arguments are truncated to 32 bits in 64-bit mode; use 'syscall' instead") + } + } + } + else {} + } + + return diags +} + +// D006 undefined symbol, D007 missing .global, D008 duplicate, D019 not exported +fn (eng &DiagEngine) check_symbols(path string, lines []string, globals map[string]bool) []Diag { + mut diags := []Diag{} + + // D019: _start defined but not declared .global + mut has_start := false + mut has_global_start := false + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.kind == .label && (l.label == '_start' || l.label == 'main') { + has_start = true + if l.label in globals { + has_global_start = true + } + } + } + if has_start && !has_global_start && eng.enabled('D019') { + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.kind == .label && (l.label == '_start' || l.label == 'main') { + diags << Diag{ + code: 'D019' + severity: .warning + message: "'${l.label}' defined but not exported" + range: DiagRange{ line: i, col_start: 0, col_end: raw.len } + } + } + } + } + if !eng.cfg.diagnostics.categories.symbol { + return diags + } + + // collect references in this file + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.kind != .instruction && l.kind != .label_and_instruction { + continue + } + for op in l.operands { + if op.kind != .label_ref { + continue + } + name := op.raw.trim_space() + if name.starts_with('.') { + continue + } + + found := eng.get_indexer().index.find(name) or { + if eng.enabled('D006') { + diags << Diag{ + code: 'D006' + severity: .error + message: "undefined symbol '${name}'" + range: DiagRange{ + line: i + col_start: 0 + col_end: raw.len + } + } + } + continue + } + + // D007 — referenced cross-file but not .global + if found.file != path && found.vis == .local { + if eng.enabled('D007') && eng.cfg.symbols.warn_missing_global { + diags << Diag{ + code: 'D007' + severity: .warning + message: "symbol '${name}' is defined in '${found.file}' but not declared .global" + range: DiagRange{ + line: i + col_start: 0 + col_end: raw.len + } + } + } + } + } + } + +// D014 dead exports — requires cross-file reference tracking (TODO) + +// D008 duplicate symbols + for name, _ in globals { + syms := eng.get_indexer().index.find_all(name) + if syms.len > 1 && eng.enabled('D008') { + files := syms.map(it.file).join("' and '") + for sym in syms { + if sym.file != path { + continue + } + diags << Diag{ + code: 'D008' + severity: .error + message: "duplicate symbol '${name}': defined in '${files}'" + range: DiagRange{ + line: sym.line_nr - 1 + col_start: 0 + col_end: 0 + } + } + } + } + } + + return diags +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +fn (eng &DiagEngine) enabled(code string) bool { + return !eng.cfg.is_suppressed(code) +} + +fn (eng &DiagEngine) make(l Line, raw string, code string, sev DiagSeverity, msg string) Diag { + // promote to error if configured + actual_sev := if sev == .warning && eng.cfg.is_error(code, 'warning') { + DiagSeverity.error + } else { + sev + } + return Diag{ + code: code + severity: actual_sev + message: msg + range: DiagRange{ + line: l.line_nr - 1 + col_start: 0 + col_end: raw.len + } + } +} + +fn diag_to_json(d Diag) string { + r := d.range + parts := [ + '{"range":{"start":{"line":' + r.line.str() + ',"character":' + r.col_start.str() + '},', + '"end":{"line":' + r.line.str() + ',"character":' + r.col_end.str() + '}},', + '"severity":' + int(d.severity).str() + ',', + '"code":"' + d.code + '",', + '"source":"gaslsp",', + '"message":"' + d.message + '"}' + ] + return parts.join('') +} + + + + +fn suffix_for_width(bits int) string { + return match bits { + 8 { 'b' } + 16 { 'w' } + 32 { 'l' } + 64 { 'q' } + else { '?' } + } +} + +fn suffix_width(s u8) int { + return match s { + `b` { 8 } + `w` { 16 } + `l` { 32 } + `q` { 64 } + else { 0 } + } +} + +fn max_unsigned(bits int) u64 { + if bits >= 64 { + return u64(-1) + } + return (u64(1) << bits) - 1 +} + +fn min_signed(bits int) i64 { + if bits >= 64 { + return i64(-9223372036854775808) + } + return -(i64(1) << (bits - 1)) +} + +fn max_signed(bits int) i64 { + if bits >= 64 { + return i64(9223372036854775807) + } + return (i64(1) << (bits - 1)) - 1 +} diff --git a/src/handlers.v b/src/handlers.v new file mode 100644 index 0000000..50be9ae --- /dev/null +++ b/src/handlers.v @@ -0,0 +1,381 @@ +// handlers.v +// LSP method handlers: hover, definition, workspace/symbol +// Wires together parser, indexer, and CSV tables + +module main + +import os +import encoding.csv + +// ─── table types ───────────────────────────────────────────────────────────── + +struct InstrEntry { + mnemonic string + suffixes string + operands string + doc string + flags string + notes string +} + +struct RegEntry { + name string + family string + bits int + notes string +} + +// ─── tables (loaded once at startup) ───────────────────────────────────────── + +pub struct Tables { +pub mut: + instrs []InstrEntry + regs []RegEntry +} + +pub fn load_tables(data_dir string) Tables { + mut t := Tables{} + t.instrs = load_instrs(os.join_path(data_dir, 'instrs.csv')) + t.regs = load_regs(os.join_path(data_dir, 'regs.csv')) + return t +} + +fn load_instrs(path string) []InstrEntry { + raw := os.read_file(path) or { return [] } + mut r := csv.new_reader(raw) + r.read() or { return [] } // header + mut out := []InstrEntry{} + for { + row := r.read() or { break } + if row.len < 6 { + continue + } + out << InstrEntry{ + mnemonic: row[0].trim_space() + suffixes: row[1].trim_space() + operands: row[2].trim_space() + doc: row[3].trim_space() + flags: row[4].trim_space() + notes: row[5].trim_space() + } + } + return out +} + +fn load_regs(path string) []RegEntry { + raw := os.read_file(path) or { return [] } + mut r := csv.new_reader(raw) + r.read() or { return [] } + mut out := []RegEntry{} + for { + row := r.read() or { break } + if row.len < 4 { + continue + } + out << RegEntry{ + name: row[0].trim_space() + family: row[2].trim_space() + bits: row[1].trim_space().int() + notes: row[3].trim_space() + } + } + return out +} + +fn (t &Tables) find_instr(mnemonic string) ?InstrEntry { + for e in t.instrs { + if e.mnemonic == mnemonic { + return e + } + } + return none +} + +fn (t &Tables) find_reg(name string) ?RegEntry { + for e in t.regs { + if e.name == name { + return e + } + } + return none +} + +// ─── server extension ──────────────────────────────────────────────────────── + +// Add to Server struct in main.v: +// tables Tables +// indexer Indexer + +// Called once after initialize +pub fn (mut srv Server) load_data(workspace string) { + data_dir := resolve_data_dir() + srv.tables = load_tables(data_dir) + srv.indexer = new_indexer(srv.cfg) + srv.diag = new_diag_engine(srv.cfg, srv.tables, &srv.indexer) + if srv.cfg.indexing.scope == .workspace && workspace.len > 0 { + srv.indexer.index_workspace(workspace) + } +} + +// ─── hover ──────────────────────────────────────────────────────────────────── + +pub fn (mut srv Server) on_hover(req RpcRequest) { + params := req.params or { + send_response(req.id, 'null') + return + } + path := extract_path(params) or { + send_response(req.id, 'null') + return + } + pos_raw := json_raw_field(params, 'position') or { + send_response(req.id, 'null') + return + } + line_nr := json_int_field(pos_raw, 'line') or { + send_response(req.id, 'null') + return + } + char_nr := json_int_field(pos_raw, 'character') or { + send_response(req.id, 'null') + return + } + lines := os.read_lines(path) or { + send_response(req.id, 'null') + return + } + if line_nr >= lines.len { + send_response(req.id, 'null') + return + } + + raw_line := lines[line_nr] + word := word_at(raw_line, char_nr) + if word.len == 0 { + send_response(req.id, 'null') + return + } + + content := srv.hover_content(word, raw_line) + if content.len == 0 { + send_response(req.id, 'null') + return + } + + escaped := content + send_response(req.id, '{"contents":{"kind":"markdown","value":"' + escaped + '"}}') +} + +fn (srv &Server) hover_content(word string, raw_line string) string { + // strip % prefix for register lookup + clean := if word.starts_with('%') { word[1..] } else { word } + clean_lower := clean.to_lower() + + // register hover + if word.starts_with('%') { + if reg := srv.tables.find_reg(clean_lower) { + mut md := '**%${reg.name}** — ${reg.bits}-bit' + if reg.notes.len > 0 { + md += '\\n\\n${reg.notes}' + } + return md + } + return '' + } + + // instruction hover — try exact then strip suffix + parsed := parse_line(raw_line, 0) + mnemonic := if parsed.kind == .instruction || parsed.kind == .label_and_instruction { + parsed.mnemonic + } else { + clean_lower + } + + if instr := srv.tables.find_instr(mnemonic) { + mut md := '**${mnemonic}**' + if instr.suffixes.len > 0 { + md += '`${instr.suffixes}`' + } + if instr.operands.len > 0 { + md += ' *${instr.operands}*' + } + md += '\\n\\n${instr.doc}' + if instr.flags != 'none' && instr.flags.len > 0 { + md += '\\n\\n**Flags:** ${instr.flags}' + } + if instr.notes.len > 0 { + md += '\\n\\n> ${instr.notes}' + } + // inferred size warning + if parsed.suffix == 0 && parsed.operands.len > 0 { + has_reg := parsed.operands.any(it.kind == .register) + if has_reg && srv.cfg.infer.warn_inferred_size { + md += '\\n\\n⚠ *No size suffix — size inferred from operand*' + } + } + return md + } + + // symbol hover + if sym := srv.indexer.index.find(clean_lower) { + vis := match sym.vis { + .global { 'global' } + .local { 'local' } + .extern { 'extern' } + } + return '**${sym.name}** — ${vis} label\\n\\n${os.base(sym.file)}:${sym.line_nr}' + } + + return '' +} + +// ─── definition ─────────────────────────────────────────────────────────────── + +pub fn (mut srv Server) on_definition(req RpcRequest) { + params := req.params or { + send_response(req.id, 'null') + return + } + path := extract_path(params) or { + send_response(req.id, 'null') + return + } + pos_raw := json_raw_field(params, 'position') or { + send_response(req.id, 'null') + return + } + line_nr := json_int_field(pos_raw, 'line') or { + send_response(req.id, 'null') + return + } + char_nr := json_int_field(pos_raw, 'character') or { + send_response(req.id, 'null') + return + } + lines := os.read_lines(path) or { + send_response(req.id, 'null') + return + } + if line_nr >= lines.len { + send_response(req.id, 'null') + return + } + + word := word_at(lines[line_nr], char_nr) + if word.len == 0 { + send_response(req.id, 'null') + return + } + + sym := srv.indexer.index.find(word) or { + send_response(req.id, 'null') + return + } + + result := location_json(path_to_uri(sym.file), sym.line_nr - 1, 0) + send_response(req.id, result) +} + +// ─── did change ─────────────────────────────────────────────────────────────── + +pub fn (mut srv Server) on_did_change(req RpcRequest) { + params := req.params or { return } + path := extract_path(params) or { return } + + // try to get content from params (didChange sends it), else re-read from disk + if content_arr := json_raw_field(params, 'contentChanges') { + // take last change's text (full sync mode — textDocumentSync: 1) + if text := json_str_field(content_arr, 'text') { + srv.indexer.index_content(path, text) + return + } + } + srv.indexer.index_file(path) +} + +// ─── workspace symbol ───────────────────────────────────────────────────────── + +pub fn (mut srv Server) on_workspace_symbol(req RpcRequest) { + params := req.params or { + send_response(req.id, '[]') + return + } + query := json_str_field(params, 'query') or { '' } + + mut results := []string{} + for name, syms in srv.indexer.index.symbols { + if query.len > 0 && !name.contains(query) { + continue + } + for s in syms { + results << symbol_info_json(s) + } + } + send_response(req.id, '[${results.join(',')}]') +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +// Extract the word (label/mnemonic/register) at a character offset +fn word_at(line string, char_pos int) string { + if char_pos > line.len { + return '' + } + is_word := fn (c u8) bool { + return c.is_letter() || c.is_digit() || c == `_` || c == `.` || c == `%` || c == `$` + } + mut start := char_pos + mut end := char_pos + for start > 0 && is_word(line[start - 1]) { + start-- + } + for end < line.len && is_word(line[end]) { + end++ + } + if start == end { + return '' + } + // include leading % or $ only if at start of token + return line[start..end] +} + +fn location_json(uri_ string, line int, col int) string { + loc := '{"uri":"' + uri_ + '","range":{"start":{"line":' + line.str() + ',"character":' + col.str() + '},"end":{"line":' + line.str() + ',"character":' + col.str() + '}}}' + return loc +} + +fn symbol_info_json(s Symbol) string { + kind := 14 + uri_ := path_to_uri(s.file) + loc := location_json(uri_, s.line_nr - 1, 0) + return '{"name":"' + s.name + '","kind":' + kind.str() + ',"location":' + loc + '}' +} + +// Extract file path from textDocument params (handles both flat and nested uri) +fn extract_path(params string) ?string { + uri := json_str_field(params, 'uri') or { + td := json_raw_field(params, 'textDocument') or { return none } + json_str_field(td, 'uri') or { return none } + } + return if uri.starts_with('file://') { uri['file://'.len..] } else { uri } +} + +fn path_to_uri(path string) string { + return 'file://' + path +} + +// Resolve directory where CSV data files live: +// 1. GASLSP_DATA env var +// 2. next to binary +// 3. ~/.config/gaslsp/ +fn resolve_data_dir() string { + if d := os.getenv_opt('GASLSP_DATA') { + if os.exists(d) { return d } + } + bin_dir := os.dir(os.executable()) + if os.exists(os.join_path(bin_dir, 'instrs.csv')) { + return bin_dir + } + home := os.join_path(os.home_dir(), '.config', 'gaslsp') + return home +} diff --git a/src/indexer.v b/src/indexer.v new file mode 100644 index 0000000..c554fb0 --- /dev/null +++ b/src/indexer.v @@ -0,0 +1,205 @@ +// indexer.v +// Walks workspace files, parses them, builds symbol table + +module main + +import os + +// ─── symbol table ───────────────────────────────────────────────────────────── + +pub enum SymbolVis { + local // not .global + global // declared .global + extern // declared .extern +} + +pub struct Symbol { +pub: + name string + vis SymbolVis + file string + line_nr int +} + +pub struct Index { +pub mut: + symbols map[string][]Symbol // name → all definitions (catches duplicates) + files []string // indexed file paths +} + +pub fn (idx &Index) find(name string) ?Symbol { + syms := idx.symbols[name] or { return none } + if syms.len == 0 { + return none + } + return syms[0] +} + +pub fn (idx &Index) find_all(name string) []Symbol { + return idx.symbols[name] or { []Symbol{} } +} + +// ─── indexer ────────────────────────────────────────────────────────────────── + +@[heap] +pub struct Indexer { +pub mut: + cfg Config + index Index +} + +pub fn new_indexer(cfg Config) Indexer { + return Indexer{ + cfg: cfg + } +} + +// Index a whole workspace root according to config scope +pub fn (mut ix Indexer) index_workspace(root string) { + match ix.cfg.indexing.scope { + .workspace { ix.walk_dir(root) } + .includes {} // driven per-file via index_file_with_includes + .open {} // driven per open notification + } +} + +// Index a single file — called on didOpen / didChange / didSave +pub fn (mut ix Indexer) index_file(path string) { + lines := os.read_lines(path) or { return } + ix.index_lines(path, lines) + + if ix.cfg.indexing.scope == .includes { + ix.follow_includes(path, lines) + } +} + +// Re-index a file from already-read content (avoids double disk read) +pub fn (mut ix Indexer) index_content(path string, content string) { + lines := content.split_into_lines() + ix.index_lines(path, lines) +} + +// Remove all symbols from a file (called before re-indexing it) +pub fn (mut ix Indexer) remove_file(path string) { + for name, mut syms in ix.index.symbols { + ix.index.symbols[name] = syms.filter(it.file != path) + } + ix.index.files = ix.index.files.filter(it != path) +} + +// ─── internals ──────────────────────────────────────────────────────────────── + +fn (mut ix Indexer) walk_dir(dir string) { + entries := os.ls(dir) or { return } + for entry in entries { + full := os.join_path(dir, entry) + if os.is_dir(full) { + if ix.cfg.general.recursive { + ix.walk_dir(full) + } + continue + } + if ix.is_asm_file(full) { + ix.index_file(full) + } + } +} + +fn (mut ix Indexer) index_lines(path string, lines []string) { + // remove stale entries first + ix.remove_file(path) + ix.index.files << path + + mut globals := map[string]bool{} + + // first pass: collect .global / .extern declarations + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.kind != .directive { + continue + } + match l.directive { + 'global', 'globl' { + for name in l.dir_args.split(',') { + globals[name.trim_space()] = true + } + } + else {} + } + } + + // second pass: collect label definitions + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.label.len == 0 { + continue + } + name := l.label + vis := if name in globals { SymbolVis.global } else { SymbolVis.local } + sym := Symbol{ + name: name + vis: vis + file: path + line_nr: i + 1 + } + if name !in ix.index.symbols { + ix.index.symbols[name] = []Symbol{} + } + ix.index.symbols[name] << sym + } +} + +fn (mut ix Indexer) follow_includes(path string, lines []string) { + base := os.dir(path) + for i, raw in lines { + l := parse_line(raw, i + 1) + if l.directive != 'include' { + continue + } + inc_path := resolve_include(l.dir_args, base, ix.cfg) + if inc_path.len == 0 || inc_path in ix.index.files { + continue + } + ix.index_file(inc_path) + } +} + +fn resolve_include(arg string, base string, cfg Config) string { + // strip quotes: "file.s" or + raw := arg.trim_space().trim('"').trim('<').trim('>') + if raw.len == 0 { + return '' + } + + candidate := if os.is_abs_path(raw) { raw } else { os.join_path(base, raw) } + + if os.exists(candidate) { + // check if outside workspace — respect follow_external_includes + if !cfg.indexing.follow_external_includes { + // simple check: if it doesn't share the base prefix, skip + // a real impl would compare against workspace root + if !candidate.starts_with(base) { + return '' + } + } + return candidate + } + + // check extra_dirs + for dir in cfg.indexing.extra_dirs { + p := os.join_path(dir, raw) + if os.exists(p) { + return p + } + } + return '' +} + +fn (ix &Indexer) is_asm_file(path string) bool { + for ext in ix.cfg.general.extensions { + if path.ends_with(ext) { + return true + } + } + return false +} diff --git a/src/main.v b/src/main.v new file mode 100644 index 0000000..59e3b8c --- /dev/null +++ b/src/main.v @@ -0,0 +1,199 @@ +module main + +import os + +struct Server { +mut: + cfg Config + initialized bool + workspace string + tables Tables + indexer Indexer + diag ?DiagEngine +} + +fn main() { + mut srv := Server{} + srv.cfg = Config{} + + for { + req := read_message() or { break } + + match req.method { + 'initialize' { + srv.handle_initialize(req) + } + 'initialized' { + srv.handle_initialized(req) + } + 'shutdown' { + send_response(req.id, 'null') + } + 'exit' { + return + } + 'textDocument/didOpen' { + srv.handle_did_open(req) + } + 'textDocument/didChange' { + srv.handle_did_change(req) + } + 'textDocument/didSave' { + srv.handle_did_save(req) + } + 'textDocument/hover' { + srv.handle_hover(req) + } + 'textDocument/definition' { + srv.handle_definition(req) + } + 'workspace/symbol' { + srv.handle_workspace_symbol(req) + } + 'workspace/didChangeConfiguration' { + srv.handle_did_change_config(req) + send_response(req.id, 'null') + } + else { + send_response(req.id, 'null') + } + } + } +} + +fn (mut srv Server) handle_initialize(req RpcRequest) { + params := req.params or { + send_response(req.id, '{"error":{"code":-32600,"message":"Invalid params"}}') + return + } + + // Extract workspace folder - handle both object and array forms + srv.workspace = json_raw_field(params, 'workspaceFolders') or { + json_str_field(params, 'rootUri') or { '' } + } + // If workspaceFolders is an array, extract the first URI + if srv.workspace.starts_with('[') { + first_uri := json_raw_field(srv.workspace, 'uri') or { '' } + srv.workspace = first_uri.trim('"') + } + if srv.workspace == '' { + srv.workspace = os.getwd() + } + + // Load config from workspace or default locations + workspace_path := if srv.workspace.starts_with('file://') { + srv.workspace['file://'.len..] + } else { + srv.workspace + } + srv.cfg = find_and_load(workspace_path) + + capabilities := '{ + "capabilities": { + "textDocumentSync": 1, + "hoverProvider": true, + "definitionProvider": true, + "workspaceSymbolProvider": true + } + }' + send_response(req.id, capabilities) +} + +fn (mut srv Server) handle_initialized(req RpcRequest) { + srv.load_data(srv.workspace) + if mut d := srv.diag { + d.publish_workspace() + } +} + +fn (mut srv Server) handle_did_change_config(req RpcRequest) { + workspace_path := if srv.workspace.starts_with('file://') { + srv.workspace['file://'.len..] + } else { + srv.workspace + } + srv.cfg = find_and_load(workspace_path) + // Re-init diag engine with new config + if mut d := srv.diag { + unsafe { d.cfg = srv.cfg } + } +} + +fn (mut srv Server) handle_did_open(req RpcRequest) { + params := req.params or { return } + td := json_raw_field(params, 'textDocument') or { return } + uri := json_str_field(td, 'uri') or { return } + path := uri_to_path(uri) + + if path == '' { + return + } + + srv.indexer.index_file(path) + + if mut d := srv.diag { + d.publish(path) + } +} + +fn (mut srv Server) handle_did_change(req RpcRequest) { + params := req.params or { return } + td := json_raw_field(params, 'textDocument') or { return } + uri := json_str_field(td, 'uri') or { return } + path := uri_to_path(uri) + + if path == '' { + return + } + + content_arr := json_raw_field(params, 'contentChanges') or { + send_response(req.id, 'null') + return + } + text := json_str_field(content_arr, 'text') or { + send_response(req.id, 'null') + return + } + + srv.indexer.index_content(path, text) + + // Don't publish on every change - wait for save to avoid flickering + send_response(req.id, 'null') +} + +fn (mut srv Server) handle_did_save(req RpcRequest) { + params := req.params or { return } + td := json_raw_field(params, 'textDocument') or { return } + uri := json_str_field(td, 'uri') or { return } + path := uri_to_path(uri) + + if path == '' { + return + } + + // Re-index file on save + srv.indexer.index_file(path) + + if mut d := srv.diag { + d.publish(path) + } +} + +fn (mut srv Server) handle_hover(req RpcRequest) { + srv.on_hover(req) +} + +fn (mut srv Server) handle_definition(req RpcRequest) { + srv.on_definition(req) +} + +fn (mut srv Server) handle_workspace_symbol(req RpcRequest) { + srv.on_workspace_symbol(req) +} + +fn uri_to_path(uri string) string { + if uri.starts_with('file://') { + return uri['file://'.len..] + } + return uri +} diff --git a/src/parser.v b/src/parser.v new file mode 100644 index 0000000..ed556f9 --- /dev/null +++ b/src/parser.v @@ -0,0 +1,408 @@ +// parser.v +// GAS AT&T assembly line parser +// Parses one line at a time into a Line struct + +module main + +pub enum LineKind { + empty + comment + label + directive + instruction + label_and_instruction // "foo: movq %rax, %rbx" +} + +pub struct Operand { +pub: + raw string // raw text as written + kind OperandKind + reg string // register name if kind == .register + imm i64 // immediate value if kind == .immediate + width int // inferred width in bits (0 = unknown) +} + +pub enum OperandKind { + unknown + register // %rax + immediate // $42 + memory // (%rax), 8(%rbp), symbol(%rip) + label_ref // bare symbol (jump target) +} + +pub struct Line { +pub: + kind LineKind + label string // defined label if any (without colon) + mnemonic string // instruction mnemonic, lowercased, no suffix + suffix u8 // 'b' 'w' 'l' 'q' or 0 + operands []Operand + directive string // directive name e.g. "global", "section" + dir_args string // raw directive arguments + raw string // original line text + line_nr int +} + +// ─── public entry point ─────────────────────────────────────────────────────── + +pub fn parse_line(raw string, line_nr int) Line { + stripped := strip_comment(raw).trim_space() + + if stripped.len == 0 || raw.trim_space().starts_with('#') || raw.trim_space().starts_with('//') { + return Line{ + kind: .empty + raw: raw + line_nr: line_nr + } + } + + // directive + if stripped.starts_with('.') { + return parse_directive(stripped, raw, line_nr) + } + + // may have a label prefix: "foo:" or "foo: movq ..." + mut rest := stripped + mut label := '' + if colon := find_label_colon(stripped) { + label = stripped[..colon].trim_space() + rest = stripped[colon + 1..].trim_space() + } + + if rest.len == 0 { + return Line{ + kind: .label + label: label + raw: raw + line_nr: line_nr + } + } + + // directive after label (rare but valid) + if rest.starts_with('.') { + mut d := parse_directive(rest, raw, line_nr) + // can't do struct update with mut field, so rebuild + return Line{ + kind: .directive + label: label + directive: d.directive + dir_args: d.dir_args + raw: raw + line_nr: line_nr + } + } + + // instruction + instr := parse_instruction(rest, raw, line_nr) + if label.len > 0 { + return Line{ + kind: .label_and_instruction + label: label + mnemonic: instr.mnemonic + suffix: instr.suffix + operands: instr.operands + raw: raw + line_nr: line_nr + } + } + return instr +} + +// ─── directive ──────────────────────────────────────────────────────────────── + +fn parse_directive(s string, raw string, line_nr int) Line { + // ".global foo" → directive="global" dir_args="foo" + rest := s[1..] // strip leading dot + sp := rest.index(' ') or { rest.index('\t') or { -1 } } + mut name := '' + mut args := '' + if sp == -1 { + name = rest.trim_space() + } else { + name = rest[..sp].trim_space() + args = rest[sp + 1..].trim_space() + } + return Line{ + kind: .directive + directive: name.to_lower() + dir_args: args + raw: raw + line_nr: line_nr + } +} + +// ─── instruction ───────────────────────────────────────────────────────────── + +fn parse_instruction(s string, raw string, line_nr int) Line { + // split mnemonic from operands + sp := first_whitespace(s) + mut mnem_raw := '' + mut ops_raw := '' + if sp == -1 { + mnem_raw = s + } else { + mnem_raw = s[..sp] + ops_raw = s[sp + 1..].trim_space() + } + + mnem_lower := mnem_raw.to_lower() + mnemonic, suffix := split_suffix(mnem_lower) + operands := if ops_raw.len > 0 { parse_operands(ops_raw) } else { []Operand{} } + + return Line{ + kind: .instruction + mnemonic: mnemonic + suffix: suffix + operands: operands + raw: raw + line_nr: line_nr + } +} + +// split "movq" → ("mov", `q`), "mov" → ("mov", 0) +fn split_suffix(mnem string) (string, u8) { + if mnem.len == 0 { + return '', 0 + } + last := mnem[mnem.len - 1] + // only split if the base without suffix is a known-style mnemonic + // conservative: only strip if last char is a known suffix + if last == `b` || last == `w` || last == `l` || last == `q` { + // don't strip from mnemonics that end in those letters naturally + // e.g. "call", "cmpxchg", "push", "mul", "jl", "jnl", "jg", etc. + nosuf := mnem[..mnem.len - 1] + if is_suffixable(nosuf) { + return nosuf, last + } + } + return mnem, 0 +} + +// mnemonics that accept size suffixes — checked after stripping last char +fn is_suffixable(base string) bool { + return base in [ + 'mov', + 'movs', + 'movz', + 'push', + 'pop', + 'add', + 'sub', + 'and', + 'or', + 'xor', + 'not', + 'neg', + 'inc', + 'dec', + 'mul', + 'imul', + 'div', + 'idiv', + 'cmp', + 'test', + 'shl', + 'shr', + 'sar', + 'lea', + 'xchg', + 'cmpxchg', + 'bsf', + 'bsr', + 'bswap', + 'cmov', + 'cmove', + 'cmovne', + 'cmovg', + 'cmovge', + 'cmovl', + 'cmovle', + 'cmova', + 'cmovae', + 'cmovb', + 'cmovbe', + 'set', + 'sete', + 'setne', + 'setg', + 'setge', + 'setl', + 'setle', + 'seta', + 'setae', + 'setb', + 'setbe', + 'sto', + 'lod', + ] +} + +// ─── operand parsing ────────────────────────────────────────────────────────── + +fn parse_operands(s string) []Operand { + mut ops := []Operand{} + // split by comma, but not commas inside parentheses + parts := split_operands(s) + for p in parts { + ops << classify_operand(p.trim_space()) + } + return ops +} + +fn split_operands(s string) []string { + mut parts := []string{} + mut depth := 0 + mut start := 0 + for i, c in s { + if c == `(` { + depth++ + } + if c == `)` { + depth-- + } + if c == `,` && depth == 0 { + parts << s[start..i].trim_space() + start = i + 1 + } + } + if start < s.len { + parts << s[start..].trim_space() + } + return parts +} + +fn classify_operand(s string) Operand { + if s.len == 0 { + return Operand{ + raw: s + kind: .unknown + } + } + + // register: starts with % + if s.starts_with('%') { + reg := s[1..].to_lower() + w := reg_width(reg) + return Operand{ + raw: s + kind: .register + reg: reg + width: w + } + } + + // immediate: starts with $ + if s.starts_with('$') { + val_str := s[1..] + val := parse_imm(val_str) + return Operand{ + raw: s + kind: .immediate + imm: val + } + } + + // memory: contains ( or is purely numeric offset + if s.contains('(') || s.contains(')') { + return Operand{ + raw: s + kind: .memory + } + } + + // bare symbol / label reference (jump target etc.) + return Operand{ + raw: s + kind: .label_ref + } +} + +fn parse_imm(s string) i64 { + t := s.trim_space() + if t.starts_with('0x') || t.starts_with('0X') { + return i64(t[2..].parse_uint(16, 64) or { 0 }) + } + if t.starts_with('0b') || t.starts_with('0B') { + return i64(t[2..].parse_uint(2, 64) or { 0 }) + } + if t.starts_with('-') { + return -(t[1..].parse_uint(10, 64) or { 0 }).str().i64() + } + return i64(t.parse_uint(10, 64) or { 0 }) +} + +// return bit width of a register name, 0 if unknown +fn reg_width(name string) int { + return match name { + 'al', 'ah', 'bl', 'bh', 'cl', 'ch', 'dl', 'dh', 'sil', 'dil', 'spl', 'bpl', 'r8b', 'r9b', + 'r10b', 'r11b', 'r12b', 'r13b', 'r14b', 'r15b' { + 8 + } + 'ax', 'bx', 'cx', 'dx', 'si', 'di', 'sp', 'bp', 'r8w', 'r9w', 'r10w', 'r11w', 'r12w', + 'r13w', 'r14w', 'r15w' { + 16 + } + 'eax', 'ebx', 'ecx', 'edx', 'esi', 'edi', 'esp', 'ebp', 'r8d', 'r9d', 'r10d', 'r11d', + 'r12d', 'r13d', 'r14d', 'r15d' { + 32 + } + 'rax', 'rbx', 'rcx', 'rdx', 'rsi', 'rdi', 'rsp', 'rbp', 'rip', 'r8', 'r9', 'r10', 'r11', + 'r12', 'r13', 'r14', 'r15' { + 64 + } + 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'xmm8', 'xmm9', 'xmm10', + 'xmm11', 'xmm12', 'xmm13', 'xmm14', 'xmm15' { + 128 + } + 'ymm0', 'ymm1', 'ymm2', 'ymm3', 'ymm4', 'ymm5', 'ymm6', 'ymm7', 'ymm8', 'ymm9', 'ymm10', + 'ymm11', 'ymm12', 'ymm13', 'ymm14', 'ymm15' { + 256 + } + else { + 0 + } + } +} + +// ─── helpers ────────────────────────────────────────────────────────────────── + +// strip ; and # comments, respecting quoted strings +fn strip_comment(s string) string { + mut in_str := false + for i, c in s { + if c == `"` { + in_str = !in_str + } + if !in_str && (c == `#` || c == `;`) { + return s[..i] + } + } + return s +} + +// find the colon that ends a label, ignoring colons inside strings/memory operands +fn find_label_colon(s string) ?int { + for i, c in s { + if c == `:` { + return i + } + // if we hit whitespace before a colon, no label here + if c == ` ` || c == `\t` { + return none + } + // directives and instructions don't have labels + if c == `.` && i == 0 { + return none + } + } + return none +} + +fn first_whitespace(s string) int { + for i, c in s { + if c == ` ` || c == `\t` { + return i + } + } + return -1 +} diff --git a/src/rpc.v b/src/rpc.v new file mode 100644 index 0000000..1eed7c6 --- /dev/null +++ b/src/rpc.v @@ -0,0 +1,200 @@ +module main + +import os + +pub struct RpcRequest { +pub: + jsonrpc string + id ?int + method string + params ?string +} + +pub fn read_message() ?RpcRequest { + mut content_length := -1 + + for { + line := read_line() or { break } + trimmed := line.trim_space() + if trimmed.len == 0 { + break + } + if trimmed.to_lower().starts_with('content-length:') { + val := trimmed['content-length:'.len..].trim_space() + content_length = val.int() + } + } + + if content_length <= 0 { + return none + } + + mut body := '' + mut remaining := content_length + for remaining > 0 { + chunk, n := os.fd_read(0, remaining) + if n <= 0 { + return none + } + body += chunk + remaining -= n + } + + return parse_request(body) +} + +pub fn send_response(id ?int, result string) { + write_message("{\"jsonrpc\":\"2.0\",\"id\":${id_str(id)},\"result\":${result}}") +} + +pub fn send_error(id ?int, code int, message string) { + msg := message.replace('"', '\\"') + write_message("{\"jsonrpc\":\"2.0\",\"id\":${id_str(id)},\"error\":{\"code\":${code},\"message\":\"${msg}\"}}") +} + +pub fn send_notification(method string, params string) { + write_message("{\"jsonrpc\":\"2.0\",\"method\":\"${method}\",\"params\":${params}}") +} + +fn write_message(body string) { + os.fd_write(1, "Content-Length: ${body.len}\r\n\r\n") + os.fd_write(1, body) +} + +fn read_line() ?string { + mut line := []u8{} + for { + data, n := os.fd_read(0, 1) + if n <= 0 { + return none + } + b := data[0] + if b == `\n` { + break + } + if b != `\r` { + line << b + } + } + return line.bytestr() +} + +pub fn id_str(id ?int) string { + return if v := id { v.str() } else { 'null' } +} + +fn parse_request(body string) ?RpcRequest { + method := json_str_field(body, 'method') or { return none } + id := json_int_field(body, 'id') + params := json_raw_field(body, 'params') + return RpcRequest{ + jsonrpc: '2.0' + id: id + method: method + params: params + } +} + +pub fn json_str_field(s string, key string) ?string { + needle := '"${key}":' + idx := s.index(needle) or { return none } + rest := s[idx + needle.len..].trim_space() + if !rest.starts_with('"') { + return none + } + mut i := 1 + for i < rest.len { + if rest[i] == `\\` { + i += 2 + continue + } + if rest[i] == `"` { + return rest[1..i] + } + i++ + } + return none +} + +pub fn json_int_field(s string, key string) ?int { + needle := '"${key}":' + idx := s.index(needle) or { return none } + rest := s[idx + needle.len..].trim_space() + if rest.starts_with('null') { + return none + } + mut i := 0 + for i < rest.len && (rest[i].is_digit() || (i == 0 && rest[i] == `-`)) { + i++ + } + if i == 0 { + return none + } + return rest[..i].int() +} + +pub fn json_raw_field(s string, key string) ?string { + needle := '"${key}":' + idx := s.index(needle) or { return none } + rest := s[idx + needle.len..].trim_space() + if rest.len == 0 { + return none + } + return extract_raw_value(rest) +} + +fn extract_raw_value(s string) ?string { + if s.len == 0 { + return none + } + ch := s[0] + if ch == `{` || ch == `[` { + close := if ch == `{` { `}` } else { `]` } + mut depth := 0 + mut in_str := false + for i, c in s { + if in_str { + if c == `\\` { + continue + } + if c == `"` { + in_str = false + } + continue + } + if c == `"` { + in_str = true + continue + } + if c == ch { + depth++ + } + if c == close { + depth-- + if depth == 0 { + return s[..i + 1] + } + } + } + return none + } + if ch == `"` { + mut i := 1 + for i < s.len { + if s[i] == `\\` { + i += 2 + continue + } + if s[i] == `"` { + return s[..i + 1] + } + i++ + } + return none + } + mut i := 0 + for i < s.len && s[i] != `,` && s[i] != `}` && s[i] != `]` { + i++ + } + return s[..i].trim_space() +} diff --git a/src/tables/instrs.csv b/src/tables/instrs.csv new file mode 100644 index 0000000..dd15db0 --- /dev/null +++ b/src/tables/instrs.csv @@ -0,0 +1,106 @@ +mnemonic,suffixes,operands,description,flags,notes +mov,bwlq,"src, dst","Copy src to dst. Most common instruction.",none,"Size suffix required unless operand sizes are unambiguous. AT&T: source first." +movs,bwlq,"src, dst","Move with sign-extension. Extends narrower src into wider dst.",none,"e.g. movslq sign-extends 32-bit to 64-bit." +movz,bwlq,"src, dst","Move with zero-extension. Fills upper bits with zero.",none,"No movzlq — movl to 32-bit reg implicitly zeros upper 32 bits of 64-bit reg." +push,wlq,src,"Decrement %rsp by operand size, then store src at (%rsp).",none,"Default 64-bit on x86-64. pushb not encodable." +pop,wlq,dst,"Load value from (%rsp) into dst, then increment %rsp.",none,"Default 64-bit on x86-64." +lea,lq,"mem, dst","Load Effective Address. Computes address of mem operand, stores in dst.",none,"Does NOT dereference. Useful for address arithmetic and fast multiply." +add,bwlq,"src, dst","dst = dst + src.",OF SF ZF AF CF PF,"" +sub,bwlq,"src, dst","dst = dst - src.",OF SF ZF AF CF PF,"" +mul,bwlq,src,"Unsigned multiply. Result = %[r/e/]ax * src. High half stored in %[r/e/]dx.",OF CF,"Implicit %ax/%eax/%rax operand." +imul,bwlq,"src [,dst [,imm]]","Signed multiply. One- two- or three-operand form.",OF CF,"One-operand: result in rdx:rax. Two-operand: dst *= src. Three-operand: dst = src * imm." +div,bwlq,src,"Unsigned divide rdx:rax by src. Quotient → rax, remainder → rdx.",undefined,"Divide-by-zero or quotient overflow raises #DE." +idiv,bwlq,src,"Signed divide rdx:rax by src. Quotient → rax, remainder → rdx.",undefined,"Same overflow/divide-by-zero caveat as div." +inc,bwlq,dst,"dst = dst + 1.",OF SF ZF AF PF,"Does NOT affect CF. Use add $1 if CF matters." +dec,bwlq,dst,"dst = dst - 1.",OF SF ZF AF PF,"Does NOT affect CF." +neg,bwlq,dst,"dst = 0 - dst (two's complement negate).",OF SF ZF AF CF PF,"" +not,bwlq,dst,"Bitwise NOT. dst = ~dst.",none,"Does not set flags." +and,bwlq,"src, dst","dst = dst & src.",OF SF ZF PF CF,"OF and CF cleared." +or,bwlq,"src, dst","dst = dst | src.",OF SF ZF PF CF,"OF and CF cleared." +xor,bwlq,"src, dst","dst = dst ^ src.",OF SF ZF PF CF,"xorl %eax,%eax is canonical zero-register idiom." +shl,bwlq,"imm/%cl, dst","Shift dst left by count. Fills with zeros.",OF CF SF ZF PF,"Same encoding as sal." +shr,bwlq,"imm/%cl, dst","Logical shift right. Fills with zeros.",OF CF SF ZF PF,"" +sar,bwlq,"imm/%cl, dst","Arithmetic shift right. Fills with sign bit.",OF CF SF ZF PF,"" +cmp,bwlq,"src, dst","Compute dst - src and set flags. Discards result.",OF SF ZF AF CF PF,"Does not modify dst." +test,bwlq,"src, dst","Compute dst & src and set flags. Discards result.",OF SF ZF PF CF,"OF and CF cleared. Use to test single bits or check for zero." +jmp,,target,"Unconditional jump.",none,"" +je,,target,"Jump if Equal (ZF=1).",none,"Alias: jz" +jne,,target,"Jump if Not Equal (ZF=0).",none,"Alias: jnz" +jg,,target,"Jump if Greater (signed, ZF=0 and SF=OF).",none,"" +jge,,target,"Jump if Greater or Equal (signed, SF=OF).",none,"" +jl,,target,"Jump if Less (signed, SF≠OF).",none,"" +jle,,target,"Jump if Less or Equal (signed, ZF=1 or SF≠OF).",none,"" +ja,,target,"Jump if Above (unsigned, CF=0 and ZF=0).",none,"" +jae,,target,"Jump if Above or Equal (unsigned, CF=0).",none,"" +jb,,target,"Jump if Below (unsigned, CF=1).",none,"Alias: jc" +jbe,,target,"Jump if Below or Equal (unsigned, CF=1 or ZF=1).",none,"" +js,,target,"Jump if Sign (SF=1).",none,"" +jns,,target,"Jump if Not Sign (SF=0).",none,"" +jo,,target,"Jump if Overflow (OF=1).",none,"" +jno,,target,"Jump if Not Overflow (OF=0).",none,"" +call,,target,"Push return address (%rip), then jump to target.",none,"" +ret,,,"Pop return address from stack into %rip.",none,"retq is the 64-bit form." +nop,,,"No operation. One byte (0x90).",none,"Can be used for alignment or patching." +hlt,,,"Halt CPU until next interrupt.",none,"Ring 0 only." +syscall,,,"Fast system call. Transfers to OS. Number in %rax.",CF TF,"Args: rdi rsi rdx r10 r8 r9. Return: rax. rcx and r11 clobbered." +int,,"$imm","Software interrupt. Triggers interrupt vector imm.",,"int $0x80 = legacy 32-bit Linux syscall." +xchg,bwlq,"src, dst","Atomically swap src and dst.",none,"Has implicit LOCK prefix when one operand is memory." +cmpxchg,bwlq,"src, dst","Compare %[a/r]ax with dst. If equal, dst=src. Else %ax=dst.",ZF,"Use with LOCK prefix for atomic CAS." +bswap,lq,dst,"Byte-swap dst (reverse byte order). Used for endian conversion.",none,"Only 32/64-bit forms." +bsf,lq,"src, dst","Bit Scan Forward. dst = index of lowest set bit in src.",ZF,"ZF set if src=0. Result undefined if src=0." +bsr,lq,"src, dst","Bit Scan Reverse. dst = index of highest set bit in src.",ZF,"ZF set if src=0." +cmove,lq,"src, dst","Conditional move if Equal (ZF=1).",none,"" +cmovne,lq,"src, dst","Conditional move if Not Equal (ZF=0).",none,"" +cmovg,lq,"src, dst","Conditional move if Greater (signed).",none,"" +cmovl,lq,"src, dst","Conditional move if Less (signed).",none,"" +cmova,lq,"src, dst","Conditional move if Above (unsigned).",none,"" +cmovb,lq,"src, dst","Conditional move if Below (unsigned).",none,"" +sete,b,dst,"Set byte to 1 if Equal (ZF=1), else 0.",none,"dst must be 8-bit reg or memory byte." +setne,b,dst,"Set byte if Not Equal (ZF=0).",none,"" +setg,b,dst,"Set byte if Greater (signed).",none,"" +setl,b,dst,"Set byte if Less (signed).",none,"" +seta,b,dst,"Set byte if Above (unsigned).",none,"" +setb,b,dst,"Set byte if Below (unsigned).",none,"" +rep,,,"Repeat string operation %rcx times. Used as prefix.",none,"REP MOVSB/STOSB/SCASB etc." +movsb,,,"Copy byte from (%rsi) to (%rdi). Advance both by 1 (direction flag).",none,"Use with REP for bulk copy." +movsd,,,"Copy dword from (%rsi) to (%rdi). Advance by 4.",none,"" +movsq,,,"Copy qword from (%rsi) to (%rdi). Advance by 8.",none,"" +stosb,,,"Store %al to (%rdi). Advance %rdi.",none,"Use REP STOSB to zero/fill memory." +stosq,,,"Store %rax to (%rdi). Advance %rdi by 8.",none,"" +cld,,,"Clear Direction Flag. String ops increment %rsi/%rdi.",DF,"Use before string ops unless you want decrement." +std,,,"Set Direction Flag. String ops decrement %rsi/%rdi.",DF,"" +pushf,,,"Push %rflags onto stack.",none,"" +popf,,,"Pop stack into %rflags.",all,"" +stc,,,"Set Carry Flag.",CF,"" +clc,,,"Clear Carry Flag.",CF,"" +cmc,,,"Complement Carry Flag.",CF,"" +leave,,,"Restore stack frame. mov %rbp,%rsp; pop %rbp.",none,"" +cbw,,,"Sign-extend %al into %ax.",none,"cbw/cwde/cdqe are different mnemonics for the same opcode family." +cwde,,,"Sign-extend %ax into %eax.",none,"" +cdqe,,,"Sign-extend %eax into %rax.",none,"" +cwd,,,"Sign-extend %ax into %dx:%ax.",none,"" +cdq,,,"Sign-extend %eax into %edx:%eax. Use before idivl.",none,"" +cqo,,,"Sign-extend %rax into %rdx:%rax. Use before idivq.",none,"" +lock,,,"Bus lock prefix. Makes the following RMW instruction atomic.",none,"Only valid with: add sub and or xor xchg cmpxchg inc dec neg not." +rdtsc,,,"Read Time Stamp Counter. Result in %edx:%eax.",none,"%rcx clobbered. Not serializing." +cpuid,,,"CPU identification. Input in %eax (leaf). Output in %eax/%ebx/%ecx/%edx.",none,"Serializing instruction." +cli,,,"Clear Interrupt Flag. Disables maskable interrupts.",none,"Ring 0 only. Use 'sti' to re-enable." +sti,,,"Set Interrupt Flag. Enables maskable interrupts.",none,"Ring 0 only. Must follow a 'cli' with actual I/O." +pusha,,,"Push all general-purpose registers.",none,"Pushes: ax, cx, dx, bx, sp, bp, si, di. Order is undefined." +popa,,,"Pop all general-purpose registers.",none,"Pops: di, si, bp, sp, bx, dx, cx, ax. Ignores the popped value for sp." +wait,,,"Check for and process pending unmasked exceptions.",none,"Also used with FPU." +iret,,,"Interrupt Return. Pops IP, CS, and flags.",none,"Return from interrupt handler." +lgdt,,mem,"Load Global Descriptor Table Register.",none,"Loads GDT from memory. Operand is 10-byte pointer." +lidt,,mem,"Load Interrupt Descriptor Table Register.",none,"Loads IDT from memory. Operand is 10-byte pointer." +ltr,,reg,"Load Task Register.",none,"Loads task register from TR. Also sets the busy bit." +sgdt,,mem,"Store Global Descriptor Table Register.",none,"Stores GDT to memory. Returns 10-byte pointer." +sidt,,mem,"Store Interrupt Descriptor Table Register.",none,"Stores IDT to memory. Returns 10-byte pointer." +str,,reg,"Store Task Register.",none,"Stores current task register." +rdmsr,,,"Read Model Specific Register. Input in %ecx.",none,"Output in %edx:%eax. Ring 0 only." +wrmsr,,,"Write Model Specific Register. Input in %ecx and %edx:%eax.",none,"Ring 0 only." +rdtscp,,,"Read Time Stamp Counter and Processor Info.",none,"Result in %edx:%eax. %rcx clobbered. Serializing." +movq,_,cr,"Move to/from Control Register.",none,"cr0, cr2, cr3, cr4, cr8. Ring 0 only." +movq,_,dr,"Move to/from Debug Register.",none,"dr0-dr7. Ring 0 only." +invlpg,,mem,"Invalidate TLB Entry.",none,"Flushes single TLB entry. Ring 0 only." +invd,,,"Invalidate all caches.",none,"Flushes all caches and TLBs. Ring 0 only." +wbinvd,,,"Write back and invalidate caches.",none,"Writes back then flushes. Ring 0 only." \ No newline at end of file diff --git a/src/tables/regs.csv b/src/tables/regs.csv new file mode 100644 index 0000000..71bda91 --- /dev/null +++ b/src/tables/regs.csv @@ -0,0 +1,124 @@ +name,bits,family,notes +al,8,a,low byte of rax +ah,8,a,high byte of ax (not accessible with REX prefix) +ax,16,a,low word of rax +eax,32,a,low dword of rax; writing zeros upper 32 bits +rax,64,a,general purpose / return value + +bl,8,b,low byte of rbx +bh,8,b,high byte of bx (not accessible with REX prefix) +bx,16,b,low word of rbx +ebx,32,b,low dword of rbx; writing zeros upper 32 bits +rbx,64,b,callee-saved + +cl,8,c,low byte of rcx; implicit shift count +ch,8,c,high byte of cx (not accessible with REX prefix) +cx,16,c,low word of rcx +ecx,32,c,low dword of rcx; writing zeros upper 32 bits +rcx,64,c,4th integer argument (SysV ABI) / implicit shift count + +dl,8,d,low byte of rdx +dh,8,d,high byte of dx (not accessible with REX prefix) +dx,16,d,low word of rdx +edx,32,d,low dword of rdx; writing zeros upper 32 bits +rdx,64,d,3rd integer argument / high half of mul/div result + +sil,8,si,low byte of rsi (REX required) +si,16,si,low word of rsi +esi,32,si,low dword of rsi; writing zeros upper 32 bits +rsi,64,si,2nd integer argument / string source index + +dil,8,di,low byte of rdi (REX required) +di,16,di,low word of rdi +edi,32,di,low dword of rdi; writing zeros upper 32 bits +rdi,64,di,1st integer argument / string destination index + +spl,8,sp,low byte of rsp (REX required) +sp,16,sp,low word of rsp +esp,32,sp,low dword of rsp; writing zeros upper 32 bits +rsp,64,sp,stack pointer; callee-saved + +bpl,8,bp,low byte of rbp (REX required) +bp,16,bp,low word of rbp +ebp,32,bp,low dword of rbp; writing zeros upper 32 bits +rbp,64,bp,frame pointer; callee-saved + +r8b,8,r8,low byte of r8 +r8w,16,r8,low word of r8 +r8d,32,r8,low dword of r8; writing zeros upper 32 bits +r8,64,r8,5th integer argument + +r9b,8,r9,low byte of r9 +r9w,16,r9,low word of r9 +r9d,32,r9,low dword of r9; writing zeros upper 32 bits +r9,64,r9,6th integer argument + +r10b,8,r10,low byte of r10 +r10w,16,r10,low word of r10 +r10d,32,r10,low dword of r10; writing zeros upper 32 bits +r10,64,r10,caller-saved; used as static chain pointer in some ABIs + +r11b,8,r11,low byte of r11 +r11w,16,r11,low word of r11 +r11d,32,r11,low dword of r11; writing zeros upper 32 bits +r11,64,r11,caller-saved; clobbered by syscall + +r12b,8,r12,low byte of r12 +r12w,16,r12,low word of r12 +r12d,32,r12,low dword of r12; writing zeros upper 32 bits +r12,64,r12,callee-saved + +r13b,8,r13,low byte of r13 +r13w,16,r13,low word of r13 +r13d,32,r13,low dword of r13; writing zeros upper 32 bits +r13,64,r13,callee-saved + +r14b,8,r14,low byte of r14 +r14w,16,r14,low word of r14 +r14d,32,r14,low dword of r14; writing zeros upper 32 bits +r14,64,r14,callee-saved + +r15b,8,r15,low byte of r15 +r15w,16,r15,low word of r15 +r15d,32,r15,low dword of r15; writing zeros upper 32 bits +r15,64,r15,callee-saved + +rip,64,ip,instruction pointer; read-only; used in RIP-relative addressing +rflags,64,flags,flags register; not a general-purpose register + +xmm0,128,xmm,SSE/AVX register; 1st FP/vector argument and return +xmm1,128,xmm,SSE/AVX register; 2nd FP/vector argument +xmm2,128,xmm,SSE/AVX register; 3rd FP/vector argument +xmm3,128,xmm,SSE/AVX register; 4th FP/vector argument +xmm4,128,xmm,SSE/AVX register; 5th FP/vector argument +xmm5,128,xmm,SSE/AVX register; 6th FP/vector argument +xmm6,128,xmm,SSE/AVX register; caller-saved +xmm7,128,xmm,SSE/AVX register; caller-saved +xmm8,128,xmm,SSE/AVX register; caller-saved +xmm9,128,xmm,SSE/AVX register; caller-saved +xmm10,128,xmm,SSE/AVX register; caller-saved +xmm11,128,xmm,SSE/AVX register; caller-saved +xmm12,128,xmm,SSE/AVX register; caller-saved +xmm13,128,xmm,SSE/AVX register; caller-saved +xmm14,128,xmm,SSE/AVX register; caller-saved +xmm15,128,xmm,SSE/AVX register; caller-saved + +ymm0,256,ymm,AVX 256-bit register; lower 128 bits alias xmm0 +ymm1,256,ymm,AVX 256-bit register; lower 128 bits alias xmm1 +ymm2,256,ymm,AVX 256-bit register +ymm3,256,ymm,AVX 256-bit register +ymm4,256,ymm,AVX 256-bit register +ymm5,256,ymm,AVX 256-bit register +ymm6,256,ymm,AVX 256-bit register +ymm7,256,ymm,AVX 256-bit register +ymm8,256,ymm,AVX 256-bit register +ymm9,256,ymm,AVX 256-bit register +ymm10,256,ymm,AVX 256-bit register +ymm11,256,ymm,AVX 256-bit register +ymm12,256,ymm,AVX 256-bit register +ymm13,256,ymm,AVX 256-bit register +ymm14,256,ymm,AVX 256-bit register +ymm15,256,ymm,AVX 256-bit register + + + diff --git a/src/v.mod b/src/v.mod new file mode 100644 index 0000000..7d7c3fa --- /dev/null +++ b/src/v.mod @@ -0,0 +1,7 @@ +Module { + name: 'gaslsp' + description: 'GNU Assembly LSP' + version: '0.0.0' + license: 'MIT' + dependencies: [] +} diff --git a/syntaxes/gas.tmLanguage.json b/syntaxes/gas.tmLanguage.json index fdfa419..7f972b6 100644 --- a/syntaxes/gas.tmLanguage.json +++ b/syntaxes/gas.tmLanguage.json @@ -90,7 +90,7 @@ "patterns": [ { "name": "keyword.instruction.assembly", - "match": "\\b(mov|add|sub|mul|imul|div|idiv|call|jmp|je|jne|ja|jb|jbe|jae|jg|jl|jle|jge|jo|jno|js|jns|jz|jnz|ret|push|pop|xchg|lea|cmp|test|and|or|xor|not|shl|shr|sar|sal|rol|ror|inc|dec|neg|nop|int|iret|hlt|cli|sti|lgdt|lldt|ltr|lgs|lfs|lss|leave|enter|loop|loope|loopne|loopz|loopnz|jcxz|jecxz|cpuid|rdmsr|wrmsr)(?:[bwlq])?\\b" + "match": "\\b(mov|add|sub|mul|imul|div|idiv|call|jmp|je|jne|ja|jb|jbe|jae|jg|jl|jle|jge|jo|jno|js|jns|jz|jnz|ret|push|pop|pusha|popa|xchg|lea|cmp|test|and|or|xor|not|shl|shr|sar|sal|rol|ror|inc|dec|neg|nop|int|iret|hlt|cli|sti|lgdt|lidt|ltr|sgdt|sidt|str|lgs|lfs|lss|leave|enter|loop|loope|loopne|loopz|loopnz|jcxz|jecxz|cpuid|rdmsr|wrmsr|rdtsc|rdtscp|cld|std|stc|clc|cmc|pushf|popf|cbw|cwde|cdqe|cwd|cdq|cqo|invlpg|invd|wbinvd)(?:[bwlq])?\\b" } ] }, diff --git a/tests/d001_missing_suffix.s b/tests/d001_missing_suffix.s new file mode 100644 index 0000000..8ca833a --- /dev/null +++ b/tests/d001_missing_suffix.s @@ -0,0 +1,4 @@ +# Test: Missing size suffix should trigger D001 (no operand to infer from) +# This is a test comment +mov $1, 0x1000 +nop diff --git a/tests/d012_pushb.s b/tests/d012_pushb.s new file mode 100644 index 0000000..d2ab118 --- /dev/null +++ b/tests/d012_pushb.s @@ -0,0 +1,7 @@ +# Test: pushb not encodable should trigger D012 +# This is a comment +.section .text +.global _start +_start: + pushb $42 + ret diff --git a/tests/d020_todo.s b/tests/d020_todo.s new file mode 100644 index 0000000..00bbb4d --- /dev/null +++ b/tests/d020_todo.s @@ -0,0 +1,4 @@ +# Test: TODO comment should trigger D020 +mov $1, %rax +# TODO: implement actual logic +nop diff --git a/tests/integration.sh b/tests/integration.sh new file mode 100755 index 0000000..e5fabb3 --- /dev/null +++ b/tests/integration.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +echo "=== gaslsp integration tests ===" + +LSP="${LSP:-$HOME/.local/bin/gaslsp}" + +test_init() { + echo "Testing initialize..." + resp=$(printf 'Content-Length: 81\r\n\r\n{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"rootUri":"file:///tmp"}}' | "$LSP" 2>/dev/null) + if ! echo "$resp" | grep -q '"capabilities"'; then + echo "FAIL: initialize" + exit 1 + fi + echo "PASS: initialize" +} + +test_shutdown() { + echo "Testing shutdown..." + init='{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"rootUri":"file:///tmp"}}' + shutdown='{"jsonrpc":"2.0","id":2,"method":"shutdown","params":null}' + exit='{"jsonrpc":"2.0","id":3,"method":"exit","params":null}' + + resp=$(printf "Content-Length: ${#init}\r\n\r\n${init}Content-Length: ${#shutdown}\r\n\r\n${shutdown}Content-Length: ${#exit}\r\n\r\n${exit}" | "$LSP" 2>/dev/null) + if ! echo "$resp" | grep -q '"capabilities"'; then + echo "FAIL: shutdown sequence" + echo "$resp" + exit 1 + fi + echo "PASS: shutdown" +} + +test_init +test_shutdown + +echo "" +echo "All tests passed!" diff --git a/tests/new_opcodes.s b/tests/new_opcodes.s new file mode 100644 index 0000000..04aa7da --- /dev/null +++ b/tests/new_opcodes.s @@ -0,0 +1,33 @@ +# Test all new opcodes +.section .data +gdt_desc: + .word 0x10 # limit + .quad 0 # base + +.section .text +.global _start +_start: + # Test new instructions + lgdt gdt_desc + lidt gdt_desc + ltr %ax + sgdt gdt_desc + sidt gdt_desc + str %ax + + rdtsc + rdtscp + + # Control reg (ring 0 only, won't assemble) + # movq %cr0, %rax + + # Debug reg + # movq %dr0, %rax + + # Cache stuff (ring 0 only) + # invlpg (%rax) + # invd + # wbinvd + + nop + ret diff --git a/tests/ring0.s b/tests/ring0.s new file mode 100644 index 0000000..31c56c9 --- /dev/null +++ b/tests/ring0.s @@ -0,0 +1,13 @@ +# Test MSR and other ring-0 instructions +.section .text +.global _start +_start: + # These require ring 0 - will fail in user mode + # rdmsr + # wrmsr + + # Wait and iret + iretq + + nop + ret diff --git a/tests/test_diags.sh b/tests/test_diags.sh new file mode 100755 index 0000000..fba83c0 --- /dev/null +++ b/tests/test_diags.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +LSP="${LSP:-$HOME/.local/bin/gaslsp}" +WORKSPACE="/home/baby/assembly-utils-vscode" + +test_diag() { + local name="$1" + local code="$2" + local content="$3" + + echo -n "Testing $name (code $code)... " + + # Write temp file in workspace + local tmpfile="$WORKSPACE/tests/tmp_${code}.s" + echo -e "$content" > "$tmpfile" + + # Run LSP and check for diagnostic + result=$(python3 -c " +import json +import subprocess +import sys + +def msg(method, params, id=1): + body = json.dumps({'jsonrpc': '2.0', 'id': id, 'method': method, 'params': params}) + return f'Content-Length: {len(body)}\r\n\r\n{body}'.encode() + +def notif(method, params): + body = json.dumps({'jsonrpc': '2.0', 'method': method, 'params': params}) + return f'Content-Length: {len(body)}\r\n\r\n{body}'.encode() + +LSP = '$LSP' +ws = '$WORKSPACE' + +reqs = [msg('initialize', {'rootUri': f'file://{ws}'}), notif('initialized', {})] + +with open('$tmpfile', 'r') as f: + content = f.read() + +reqs.append(msg('textDocument/didOpen', { + 'textDocument': {'uri': 'file://$tmpfile', 'text': content, 'version': 1} +}, id=2)) + +reqs.append(msg('shutdown', None, id=3)) +reqs.append(notif('exit', {})) + +proc = subprocess.Popen([LSP], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +stdout, stderr = proc.communicate(b''.join(reqs), timeout=5) + +# Parse responses for diagnostics +import re +parts = re.split(r'Content-Length: \d+\r\n\r\n', stdout.decode()) +for p in parts: + if '$code' in p and 'publishDiagnostics' in p: + sys.exit(0) +sys.exit(1) +" 2>/dev/null) + + if [ $? -eq 0 ]; then + echo "PASS" + else + echo "FAIL" + fi + + rm -f "$tmpfile" +} + +echo "=== Testing all diagnostic codes ===" + +# D001: missing suffix +test_diag "D001" "D001" "mov \$1, %rax" + +# D002: inferred suffix +test_diag "D002" "D002" "mov %eax, %ebx" + +# D003: size mismatch +test_diag "D003" "D003" "movl %eax, %rax" + +# D004: immediate truncated +test_diag "D004" "D004" "movb \$256, %al" + +# D005: high-byte + REX +test_diag "D005" "D005" "mov %ah, %r8" + +# D009: 32-bit base in 64-bit +test_diag "D009" "D009" "mov (%eax), %eax" + +# D010: src == dst +test_diag "D010" "D010" "add %eax, %eax" + +# D011: div with immediate +test_diag "D011" "D011" "div \$4" + +# D012: pushb not encodable +test_diag "D012" "D012" "pushb \$42" + +# D013: one-operand imul +test_diag "D013" "D013" "imul %eax" + +# D014: mul unsigned +test_diag "D014" "D014" "mul %eax" + +# D015: shift count +test_diag "D015" "D015" "shl %eax, %ebx" + +# D016: syscall clobber +test_diag "D016" "D016" "syscall" + +# D017: int 0x80 +test_diag "D017" "D017" "int \$0x80" + +# D018: incomplete label +test_diag "D018" "D018" "mylabel" + +# D020: TODO comment +test_diag "D020" "D020" "# TODO: fix this" + +echo "" +echo "=== Tests complete ===" \ No newline at end of file From ffd7723ee352ea335e6ece1e350de51b0de611c1 Mon Sep 17 00:00:00 2001 From: babywolf Date: Thu, 23 Apr 2026 17:57:37 +0100 Subject: [PATCH 2/5] v2.0.0-alpha MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 🐞 fix code directive number being spaced out - ⚠️ experimental LSP Signed-off-by: babywolf --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a653ff5..2b51d20 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: mkdir -p ~/.local/bin cp src/gaslsp ~/.local/bin/gaslsp cp -r src/tables ~/.local/bin/ - chmod +x tests/integration.sh test_diags.sh +chmod +x tests/integration.sh tests/test_diags.sh tests/integration.sh test-diags: From 1aca12b0bb3d2a4b6130829ef885a529abb55056 Mon Sep 17 00:00:00 2001 From: babywolf <60277618+fgsoftware1@users.noreply.github.com> Date: Thu, 23 Apr 2026 19:19:07 +0100 Subject: [PATCH 3/5] Update test.yml --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2b51d20..86d915d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: mkdir -p ~/.local/bin cp src/gaslsp ~/.local/bin/gaslsp cp -r src/tables ~/.local/bin/ -chmod +x tests/integration.sh tests/test_diags.sh + chmod +x tests/integration.sh tests/test_diags.sh tests/integration.sh test-diags: @@ -53,4 +53,4 @@ chmod +x tests/integration.sh tests/test_diags.sh cp src/gaslsp ~/.local/bin/gaslsp cp -r src/tables ~/.local/bin/ chmod +x tests/test_diags.sh - tests/test_diags.sh \ No newline at end of file + tests/test_diags.sh From 7347af2c8043913628d3e41fc79df7722cf2a12f Mon Sep 17 00:00:00 2001 From: babywolf Date: Thu, 23 Apr 2026 19:32:38 +0100 Subject: [PATCH 4/5] refactor: replace python test script with V implementation --- .github/workflows/test.yml | 6 +- .gitignore | 1 + src/test_diags.v | 96 +++++++++++++++++++++++++++++ tests/d001_missing_suffix.s | 4 -- tests/d012_pushb.s | 7 --- tests/d020_todo.s | 4 -- tests/new_opcodes.s | 33 ---------- tests/ring0.s | 13 ---- tests/test_diags.sh | 118 ++---------------------------------- 9 files changed, 106 insertions(+), 176 deletions(-) create mode 100644 src/test_diags.v delete mode 100644 tests/d001_missing_suffix.s delete mode 100644 tests/d012_pushb.s delete mode 100644 tests/d020_todo.s delete mode 100644 tests/new_opcodes.s delete mode 100644 tests/ring0.s diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 86d915d..e9f2dfb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,15 +42,17 @@ jobs: - name: Install V uses: vlang/setup-v@v1.4 - - name: Build gaslsp + - name: Build gaslsp and test_diags run: | cd src v -o gaslsp . + v -o test_diags test_diags.v - name: Install and test diagnostics run: | mkdir -p ~/.local/bin cp src/gaslsp ~/.local/bin/gaslsp + cp src/test_diags ~/.local/bin/test_diags cp -r src/tables ~/.local/bin/ chmod +x tests/test_diags.sh - tests/test_diags.sh + WORKSPACE="$(pwd)" tests/test_diags.sh diff --git a/.gitignore b/.gitignore index cb53200..f0056e7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules *.vsix gaslsp +src/test_diags \ No newline at end of file diff --git a/src/test_diags.v b/src/test_diags.v new file mode 100644 index 0000000..ef114b0 --- /dev/null +++ b/src/test_diags.v @@ -0,0 +1,96 @@ +module main + +import os + +fn run_diag(lsp_path string, workspace string, code string, content string) bool { + tmpfile := os.join_path(workspace, 'tests', 'tmp_${code}.s') + os.write_file(tmpfile, content) or { return false } + defer { + os.rm(tmpfile) or {} + } + + escaped_content := content.replace('\\', '\\\\').replace('"', '\\"') + escaped_tmpfile := tmpfile.replace('\\', '\\\\').replace('"', '\\"') + escaped_workspace := workspace.replace('\\', '\\\\').replace('"', '\\"') + + init_body := "{\"jsonrpc\":\"2.0\",\"method\":\"initialize\",\"params\":{\"rootUri\":\"file://" + escaped_workspace + "\"},\"id\":1}" + init_req := "Content-Length: ${init_body.len}\r\n\r\n" + init_body + init_notif_body := "{\"jsonrpc\":\"2.0\",\"method\":\"initialized\",\"params\":{}}" + init_notif := "Content-Length: ${init_notif_body.len}\r\n\r\n" + init_notif_body + open_body := "{\"jsonrpc\":\"2.0\",\"method\":\"textDocument/didOpen\",\"params\":{\"textDocument\":{\"uri\":\"file://" + escaped_tmpfile + "\",\"text\":\"" + escaped_content + "\",\"version\":1}},\"id\":2}" + open_req := "Content-Length: ${open_body.len}\r\n\r\n" + open_body + shutdown_body := "{\"jsonrpc\":\"2.0\",\"method\":\"shutdown\",\"params\":{},\"id\":3}" + shutdown_req := "Content-Length: ${shutdown_body.len}\r\n\r\n" + shutdown_body + exit_body := "{\"jsonrpc\":\"2.0\",\"method\":\"exit\",\"params\":{}}" + exit_notif := "Content-Length: ${exit_body.len}\r\n\r\n" + exit_body + + input := init_req + init_notif + open_req + shutdown_req + exit_notif + + tmp_input := os.join_path(workspace, 'tests', 'tmp_input.txt') + os.write_file(tmp_input, input) or { return false } + defer { + os.rm(tmp_input) or {} + } + +tmp_output := os.join_path(workspace, 'tests', 'tmp_output.txt') + bash_cmd := '/bin/bash -c \'cat "' + tmp_input + '" | "' + lsp_path + '" > "' + tmp_output + '"\'' + os.execute(bash_cmd) + tmp_output_content := os.read_file(tmp_output) or { + eprintln("Failed to read output: ${err}") + return false + } + + if tmp_output_content.contains(code) && tmp_output_content.contains('publishDiagnostics') { + return true + } + return false +} + +fn main() { + lsp_env := os.getenv('LSP') + lsp_path := if lsp_env.len > 0 { lsp_env } else { os.join_path(os.home_dir(), '.local', 'bin', 'gaslsp') } + workspace_env := os.getenv('WORKSPACE') + workspace := if workspace_env.len > 0 { workspace_env } else { os.getwd() } + + mut passed := 0 + mut failed := 0 + + tests := { + 'D001': 'mov $1, %rax' + 'D002': 'mov %eax, %ebx' + 'D003': 'movl %eax, %rax' + 'D004': 'movb $256, %al' + 'D005': 'mov %ah, %r8' + 'D009': 'mov (%eax), %eax' + 'D010': 'add %eax, %eax' + 'D011': 'div $4' + 'D012': 'pushb $42' + 'D013': 'imul %eax' + 'D014': 'mul %eax' + 'D015': 'shl %eax, %ebx' + 'D016': 'syscall' + 'D017': 'int $0x80' + 'D018': 'mylabel' + 'D020': '# TODO: fix this' + } + + println('=== Testing all diagnostic codes ===') + + for code, content in tests { + print('Testing ${code}... ') + if run_diag(lsp_path, workspace, code, content) { + println('PASS') + passed++ + } else { + println('FAIL') + failed++ + } + } + + println('') + println('=== Tests complete: ${passed} passed, ${failed} failed ===') + + if failed > 0 { + exit(1) + } +} \ No newline at end of file diff --git a/tests/d001_missing_suffix.s b/tests/d001_missing_suffix.s deleted file mode 100644 index 8ca833a..0000000 --- a/tests/d001_missing_suffix.s +++ /dev/null @@ -1,4 +0,0 @@ -# Test: Missing size suffix should trigger D001 (no operand to infer from) -# This is a test comment -mov $1, 0x1000 -nop diff --git a/tests/d012_pushb.s b/tests/d012_pushb.s deleted file mode 100644 index d2ab118..0000000 --- a/tests/d012_pushb.s +++ /dev/null @@ -1,7 +0,0 @@ -# Test: pushb not encodable should trigger D012 -# This is a comment -.section .text -.global _start -_start: - pushb $42 - ret diff --git a/tests/d020_todo.s b/tests/d020_todo.s deleted file mode 100644 index 00bbb4d..0000000 --- a/tests/d020_todo.s +++ /dev/null @@ -1,4 +0,0 @@ -# Test: TODO comment should trigger D020 -mov $1, %rax -# TODO: implement actual logic -nop diff --git a/tests/new_opcodes.s b/tests/new_opcodes.s deleted file mode 100644 index 04aa7da..0000000 --- a/tests/new_opcodes.s +++ /dev/null @@ -1,33 +0,0 @@ -# Test all new opcodes -.section .data -gdt_desc: - .word 0x10 # limit - .quad 0 # base - -.section .text -.global _start -_start: - # Test new instructions - lgdt gdt_desc - lidt gdt_desc - ltr %ax - sgdt gdt_desc - sidt gdt_desc - str %ax - - rdtsc - rdtscp - - # Control reg (ring 0 only, won't assemble) - # movq %cr0, %rax - - # Debug reg - # movq %dr0, %rax - - # Cache stuff (ring 0 only) - # invlpg (%rax) - # invd - # wbinvd - - nop - ret diff --git a/tests/ring0.s b/tests/ring0.s deleted file mode 100644 index 31c56c9..0000000 --- a/tests/ring0.s +++ /dev/null @@ -1,13 +0,0 @@ -# Test MSR and other ring-0 instructions -.section .text -.global _start -_start: - # These require ring 0 - will fail in user mode - # rdmsr - # wrmsr - - # Wait and iret - iretq - - nop - ret diff --git a/tests/test_diags.sh b/tests/test_diags.sh index fba83c0..2e6cbca 100755 --- a/tests/test_diags.sh +++ b/tests/test_diags.sh @@ -1,118 +1,10 @@ #!/bin/bash +# Test all diagnostic codes locally using the V test binary LSP="${LSP:-$HOME/.local/bin/gaslsp}" -WORKSPACE="/home/baby/assembly-utils-vscode" +WORKSPACE="${WORKSPACE:-$(pwd)}" -test_diag() { - local name="$1" - local code="$2" - local content="$3" - - echo -n "Testing $name (code $code)... " - - # Write temp file in workspace - local tmpfile="$WORKSPACE/tests/tmp_${code}.s" - echo -e "$content" > "$tmpfile" - - # Run LSP and check for diagnostic - result=$(python3 -c " -import json -import subprocess -import sys +export LSP +export WORKSPACE -def msg(method, params, id=1): - body = json.dumps({'jsonrpc': '2.0', 'id': id, 'method': method, 'params': params}) - return f'Content-Length: {len(body)}\r\n\r\n{body}'.encode() - -def notif(method, params): - body = json.dumps({'jsonrpc': '2.0', 'method': method, 'params': params}) - return f'Content-Length: {len(body)}\r\n\r\n{body}'.encode() - -LSP = '$LSP' -ws = '$WORKSPACE' - -reqs = [msg('initialize', {'rootUri': f'file://{ws}'}), notif('initialized', {})] - -with open('$tmpfile', 'r') as f: - content = f.read() - -reqs.append(msg('textDocument/didOpen', { - 'textDocument': {'uri': 'file://$tmpfile', 'text': content, 'version': 1} -}, id=2)) - -reqs.append(msg('shutdown', None, id=3)) -reqs.append(notif('exit', {})) - -proc = subprocess.Popen([LSP], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) -stdout, stderr = proc.communicate(b''.join(reqs), timeout=5) - -# Parse responses for diagnostics -import re -parts = re.split(r'Content-Length: \d+\r\n\r\n', stdout.decode()) -for p in parts: - if '$code' in p and 'publishDiagnostics' in p: - sys.exit(0) -sys.exit(1) -" 2>/dev/null) - - if [ $? -eq 0 ]; then - echo "PASS" - else - echo "FAIL" - fi - - rm -f "$tmpfile" -} - -echo "=== Testing all diagnostic codes ===" - -# D001: missing suffix -test_diag "D001" "D001" "mov \$1, %rax" - -# D002: inferred suffix -test_diag "D002" "D002" "mov %eax, %ebx" - -# D003: size mismatch -test_diag "D003" "D003" "movl %eax, %rax" - -# D004: immediate truncated -test_diag "D004" "D004" "movb \$256, %al" - -# D005: high-byte + REX -test_diag "D005" "D005" "mov %ah, %r8" - -# D009: 32-bit base in 64-bit -test_diag "D009" "D009" "mov (%eax), %eax" - -# D010: src == dst -test_diag "D010" "D010" "add %eax, %eax" - -# D011: div with immediate -test_diag "D011" "D011" "div \$4" - -# D012: pushb not encodable -test_diag "D012" "D012" "pushb \$42" - -# D013: one-operand imul -test_diag "D013" "D013" "imul %eax" - -# D014: mul unsigned -test_diag "D014" "D014" "mul %eax" - -# D015: shift count -test_diag "D015" "D015" "shl %eax, %ebx" - -# D016: syscall clobber -test_diag "D016" "D016" "syscall" - -# D017: int 0x80 -test_diag "D017" "D017" "int \$0x80" - -# D018: incomplete label -test_diag "D018" "D018" "mylabel" - -# D020: TODO comment -test_diag "D020" "D020" "# TODO: fix this" - -echo "" -echo "=== Tests complete ===" \ No newline at end of file +$HOME/.local/bin/test_diags \ No newline at end of file From 591d52a7cfb5f185da41fde2363a767afddb4680 Mon Sep 17 00:00:00 2001 From: babywolf Date: Thu, 23 Apr 2026 20:07:58 +0100 Subject: [PATCH 5/5] refactor: move test_diags.v to tests/ and fix workflow --- .github/workflows/test.yml | 3 ++- .gitignore | 2 +- package.json | 2 +- tests/test_diags.sh | 4 ++-- {src => tests}/test_diags.v | 28 ++++++++++++++-------------- 5 files changed, 20 insertions(+), 19 deletions(-) rename {src => tests}/test_diags.v (63%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e9f2dfb..2ec9dde 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -46,13 +46,14 @@ jobs: run: | cd src v -o gaslsp . + cd ../tests v -o test_diags test_diags.v - name: Install and test diagnostics run: | mkdir -p ~/.local/bin cp src/gaslsp ~/.local/bin/gaslsp - cp src/test_diags ~/.local/bin/test_diags + cp tests/test_diags ~/.local/bin/test_diags cp -r src/tables ~/.local/bin/ chmod +x tests/test_diags.sh WORKSPACE="$(pwd)" tests/test_diags.sh diff --git a/.gitignore b/.gitignore index f0056e7..e96a75d 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ node_modules *.vsix gaslsp -src/test_diags \ No newline at end of file +test_diags \ No newline at end of file diff --git a/package.json b/package.json index 29ef294..e4d51ff 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "assembly-utils", "displayName": "assembly-utils", "description": "", - "version": "2.0.0", + "version": "2.0.1", "publisher": "babywolf", "repository": { "url": "https://github.com/fgsoftware1/assembly-utils-vscode" diff --git a/tests/test_diags.sh b/tests/test_diags.sh index 2e6cbca..bcff3dd 100755 --- a/tests/test_diags.sh +++ b/tests/test_diags.sh @@ -1,10 +1,10 @@ #!/bin/bash # Test all diagnostic codes locally using the V test binary +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" LSP="${LSP:-$HOME/.local/bin/gaslsp}" -WORKSPACE="${WORKSPACE:-$(pwd)}" export LSP export WORKSPACE -$HOME/.local/bin/test_diags \ No newline at end of file +"$SCRIPT_DIR/test_diags" \ No newline at end of file diff --git a/src/test_diags.v b/tests/test_diags.v similarity index 63% rename from src/test_diags.v rename to tests/test_diags.v index ef114b0..178fcc5 100644 --- a/src/test_diags.v +++ b/tests/test_diags.v @@ -3,7 +3,7 @@ module main import os fn run_diag(lsp_path string, workspace string, code string, content string) bool { - tmpfile := os.join_path(workspace, 'tests', 'tmp_${code}.s') + tmpfile := os.join_path(workspace, 'tmp_${code}.s') os.write_file(tmpfile, content) or { return false } defer { os.rm(tmpfile) or {} @@ -13,30 +13,30 @@ fn run_diag(lsp_path string, workspace string, code string, content string) bool escaped_tmpfile := tmpfile.replace('\\', '\\\\').replace('"', '\\"') escaped_workspace := workspace.replace('\\', '\\\\').replace('"', '\\"') - init_body := "{\"jsonrpc\":\"2.0\",\"method\":\"initialize\",\"params\":{\"rootUri\":\"file://" + escaped_workspace + "\"},\"id\":1}" - init_req := "Content-Length: ${init_body.len}\r\n\r\n" + init_body - init_notif_body := "{\"jsonrpc\":\"2.0\",\"method\":\"initialized\",\"params\":{}}" - init_notif := "Content-Length: ${init_notif_body.len}\r\n\r\n" + init_notif_body - open_body := "{\"jsonrpc\":\"2.0\",\"method\":\"textDocument/didOpen\",\"params\":{\"textDocument\":{\"uri\":\"file://" + escaped_tmpfile + "\",\"text\":\"" + escaped_content + "\",\"version\":1}},\"id\":2}" - open_req := "Content-Length: ${open_body.len}\r\n\r\n" + open_body - shutdown_body := "{\"jsonrpc\":\"2.0\",\"method\":\"shutdown\",\"params\":{},\"id\":3}" - shutdown_req := "Content-Length: ${shutdown_body.len}\r\n\r\n" + shutdown_body - exit_body := "{\"jsonrpc\":\"2.0\",\"method\":\"exit\",\"params\":{}}" - exit_notif := "Content-Length: ${exit_body.len}\r\n\r\n" + exit_body + init_body := '{"jsonrpc":"2.0","method":"initialize","params":{"rootUri":"file://' + escaped_workspace + '"},"id":1}' + init_req := 'Content-Length: ${init_body.len}\r\n\r\n' + init_body + init_notif_body := '{"jsonrpc":"2.0","method":"initialized","params":{}}' + init_notif := 'Content-Length: ${init_notif_body.len}\r\n\r\n' + init_notif_body + open_body := '{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"file://' + escaped_tmpfile + '","text":"' + escaped_content + '","version":1}},"id":2}' + open_req := 'Content-Length: ${open_body.len}\r\n\r\n' + open_body + shutdown_body := '{"jsonrpc":"2.0","method":"shutdown","params":{},"id":3}' + shutdown_req := 'Content-Length: ${shutdown_body.len}\r\n\r\n' + shutdown_body + exit_body := '{"jsonrpc":"2.0","method":"exit","params":{}}' + exit_notif := 'Content-Length: ${exit_body.len}\r\n\r\n' + exit_body input := init_req + init_notif + open_req + shutdown_req + exit_notif - tmp_input := os.join_path(workspace, 'tests', 'tmp_input.txt') + tmp_input := os.join_path(workspace, 'tmp_input.txt') os.write_file(tmp_input, input) or { return false } defer { os.rm(tmp_input) or {} } -tmp_output := os.join_path(workspace, 'tests', 'tmp_output.txt') + tmp_output := os.join_path(workspace, 'tmp_output.txt') bash_cmd := '/bin/bash -c \'cat "' + tmp_input + '" | "' + lsp_path + '" > "' + tmp_output + '"\'' os.execute(bash_cmd) + tmp_output_content := os.read_file(tmp_output) or { - eprintln("Failed to read output: ${err}") return false }