diff --git a/cmd/semantic/main.go b/cmd/semantic/main.go index 50f9a84..f541c06 100644 --- a/cmd/semantic/main.go +++ b/cmd/semantic/main.go @@ -62,10 +62,11 @@ Flags (find/match): // snapshotElement is the JSON shape from pinchtab's /snapshot endpoint. type snapshotElement struct { - Ref string `json:"ref"` - Role string `json:"role"` - Name string `json:"name"` - Value string `json:"value"` + Ref string `json:"ref"` + Role string `json:"role"` + Name string `json:"name"` + Value string `json:"value"` + Interactive bool `json:"interactive"` } func loadSnapshot(path string) ([]semantic.ElementDescriptor, error) { @@ -94,10 +95,11 @@ func loadSnapshot(path string) ([]semantic.ElementDescriptor, error) { descs := make([]semantic.ElementDescriptor, len(elements)) for i, e := range elements { descs[i] = semantic.ElementDescriptor{ - Ref: e.Ref, - Role: e.Role, - Name: e.Name, - Value: e.Value, + Ref: e.Ref, + Role: e.Role, + Name: e.Name, + Value: e.Value, + Interactive: e.Interactive, } } return descs, nil diff --git a/cmd/semantic/main_test.go b/cmd/semantic/main_test.go new file mode 100644 index 0000000..f3e3646 --- /dev/null +++ b/cmd/semantic/main_test.go @@ -0,0 +1,38 @@ +package main + +import ( + "os" + "testing" +) + +func TestLoadSnapshot_PropagatesInteractiveFlag(t *testing.T) { + f, err := os.CreateTemp(t.TempDir(), "snapshot-*.json") + if err != nil { + t.Fatalf("CreateTemp failed: %v", err) + } + + json := `[ + {"ref":"e1","role":"button","name":"Submit","interactive":true}, + {"ref":"e2","role":"text","name":"Submit","interactive":false} + ]` + if _, err := f.WriteString(json); err != nil { + t.Fatalf("WriteString failed: %v", err) + } + if err := f.Close(); err != nil { + t.Fatalf("Close failed: %v", err) + } + + descs, err := loadSnapshot(f.Name()) + if err != nil { + t.Fatalf("loadSnapshot failed: %v", err) + } + if len(descs) != 2 { + t.Fatalf("expected 2 descriptors, got %d", len(descs)) + } + if !descs[0].Interactive { + t.Fatalf("expected first descriptor interactive=true") + } + if descs[1].Interactive { + t.Fatalf("expected second descriptor interactive=false") + } +} diff --git a/internal/engine/lexical.go b/internal/engine/lexical.go index 4ac0759..abf74bd 100644 --- a/internal/engine/lexical.go +++ b/internal/engine/lexical.go @@ -21,6 +21,10 @@ const ( phraseExactBonus = 0.15 // phrasePartialBonus rewards partial phrase containment (bigrams/trigrams). phrasePartialBonus = 0.08 + // interactiveActionBoost is applied when action verbs imply intent to interact. + interactiveActionBoost = 0.10 + // interactiveBaseBoost lightly favors interactive elements for generic queries. + interactiveBaseBoost = 0.05 ) // LexicalMatcher scores elements using Jaccard similarity with synonym @@ -47,7 +51,7 @@ func (m *LexicalMatcher) Find(_ context.Context, query string, elements []types. var candidates []scored for _, el := range elements { composite := el.Composite() - score := LexicalScore(query, composite) + score := lexicalScore(query, composite, el.Interactive) if score >= opts.Threshold { candidates = append(candidates, scored{desc: el, score: score}) } @@ -122,10 +126,27 @@ var roleKeywords = map[string]bool{ "search": true, } +var actionVerbs = map[string]bool{ + "click": true, + "press": true, + "tap": true, + "type": true, + "enter": true, + "select": true, + "check": true, + "toggle": true, + "submit": true, + "fill": true, +} + // LexicalScore computes Jaccard similarity with synonym expansion, // context-aware stopwords, role boosting, and prefix matching. // Returns [0, 1]. func LexicalScore(query, desc string) float64 { + return lexicalScore(query, desc, false) +} + +func lexicalScore(query, desc string, interactive bool) float64 { rawQTokens := tokenize(query) rawDTokens := tokenize(desc) @@ -204,7 +225,10 @@ func LexicalScore(query, desc string) float64 { // --- 5. Phrase bonus for preserving multi-word intent --- phraseBoost := phraseBonus(qTokens, dTokens) - score := jaccard + synScore + prefixScore + roleBoost + phraseBoost + // --- 6. Interactive boost for action-oriented queries --- + interactiveScore := interactiveBoost(qTokens, interactive) + + score := jaccard + synScore + prefixScore + roleBoost + phraseBoost + interactiveScore if score > 1.0 { score = 1.0 } @@ -242,6 +266,25 @@ func minInt(a, b int) int { return b } +func interactiveBoost(qTokens []string, isInteractive bool) float64 { + if !isInteractive { + return 0 + } + if containsActionVerb(qTokens) { + return interactiveActionBoost + } + return interactiveBaseBoost +} + +func containsActionVerb(tokens []string) bool { + for _, t := range tokens { + if actionVerbs[t] { + return true + } + } + return false +} + func tokenPrefixScore(qTokens, dTokens []string) float64 { if len(qTokens) == 0 { return 0 diff --git a/internal/engine/lexical_test.go b/internal/engine/lexical_test.go index 4e5e34f..693fad4 100644 --- a/internal/engine/lexical_test.go +++ b/internal/engine/lexical_test.go @@ -267,7 +267,48 @@ func TestLexicalScore_PhraseBonus_NoPhraseMatch(t *testing.T) { } } -// LexicalMatcher (types.ElementMatcher interface) tests +func TestInteractiveBoost_ActionVerbDetection(t *testing.T) { + if !containsActionVerb(tokenize("click submit")) { + t.Fatalf("expected action verb detection for action-oriented query") + } + if containsActionVerb(tokenize("account settings")) { + t.Fatalf("did not expect action verb detection for non-action query") + } +} + +func TestInteractiveBoost_NonActionUsesMildBoost(t *testing.T) { + action := interactiveBoost(tokenize("click submit"), true) + nonAction := interactiveBoost(tokenize("account settings"), true) + if nonAction <= 0 { + t.Fatalf("expected non-action interactive boost to be positive") + } + if action <= nonAction { + t.Fatalf("expected action boost to be larger than non-action boost, action=%f nonAction=%f", action, nonAction) + } +} + +func TestLexicalMatcher_ActionQueryPrefersInteractiveElement(t *testing.T) { + m := NewLexicalMatcher() + + elements := []types.ElementDescriptor{ + {Ref: "e1", Role: "button", Name: "Submit", Interactive: false}, + {Ref: "e2", Role: "button", Name: "Submit", Interactive: true}, + } + + result, err := m.Find(context.Background(), "click submit", elements, types.FindOptions{ + Threshold: 0, + TopK: 2, + }) + if err != nil { + t.Fatalf("Find returned error: %v", err) + } + if len(result.Matches) < 2 { + t.Fatalf("expected 2 matches, got %d", len(result.Matches)) + } + if result.BestRef != "e2" { + t.Fatalf("expected interactive element to rank first, got %s", result.BestRef) + } +} // LexicalMatcher (types.ElementMatcher interface) tests diff --git a/internal/types/types.go b/internal/types/types.go index cf27bdc..ba526ee 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -78,10 +78,11 @@ type MatchExplain struct { // ElementDescriptor describes a single accessibility tree node. type ElementDescriptor struct { - Ref string - Role string - Name string - Value string + Ref string + Role string + Name string + Value string + Interactive bool } // Composite returns a single string combining role, name, and value