Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,065 changes: 1,065 additions & 0 deletions shortcuts/mail/lint/linter.go

Large diffs are not rendered by default.

901 changes: 901 additions & 0 deletions shortcuts/mail/lint/linter_test.go

Large diffs are not rendered by default.

349 changes: 349 additions & 0 deletions shortcuts/mail/lint/rules.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT

package lint

import "strings"

// Rule IDs surfaced through Finding.RuleID. UPPER_SNAKE_CASE is the contract
// (S2 contract «Header / RPC contract» — Stdout envelope contract). New rules
// MUST keep this naming convention so AI / test consumers can pattern-match
// reliably.
const (
// Tag-level rules.
RuleTagFontToSpan = "TAG_FONT_TO_SPAN"
RuleTagCenterToDiv = "TAG_CENTER_TO_DIV"
RuleTagMarqueeToText = "TAG_MARQUEE_TO_TEXT"
RuleTagBlinkToText = "TAG_BLINK_TO_TEXT"
RuleTagScriptBlocked = "TAG_SCRIPT_BLOCKED"
RuleTagIframeBlocked = "TAG_IFRAME_BLOCKED"
RuleTagObjectBlocked = "TAG_OBJECT_BLOCKED"
RuleTagEmbedBlocked = "TAG_EMBED_BLOCKED"
RuleTagFormBlocked = "TAG_FORM_BLOCKED"
RuleTagInputBlocked = "TAG_INPUT_BLOCKED"
RuleTagLinkBlocked = "TAG_LINK_BLOCKED"
RuleTagMetaBlocked = "TAG_META_BLOCKED"
RuleTagBaseBlocked = "TAG_BASE_BLOCKED"
RuleTagUnknownStripped = "TAG_UNKNOWN_STRIPPED"

// Attribute-level rules.
RuleAttrEventHandlerBlocked = "ATTR_EVENT_HANDLER_BLOCKED"
RuleAttrJSURLBlocked = "ATTR_JS_URL_BLOCKED"
RuleAttrUnsafeSchemeBlocked = "ATTR_UNSAFE_SCHEME_BLOCKED"

// Style-level rules.
RuleStylePropertyDropped = "STYLE_PROPERTY_DROPPED"

// Feishu-native autofix rules. These autofix the inline style /
// class / nesting shape of common elements so AI-authored HTML
// matches what Feishu mail-editor itself emits, fixing the visual
// "extra blank line between blocks", "list bullets/numbers missing",
// "link color wrong" etc. classes of issues. The rewrite is purely
// additive — user-supplied inline styles take precedence; the lib
// only fills the missing properties.
RuleStyleListNative = "STYLE_LIST_NATIVE_INLINE_APPLIED"
RuleStyleListItemNative = "STYLE_LIST_ITEM_NATIVE_INLINE_APPLIED"
RuleStyleBlockquoteNative = "STYLE_BLOCKQUOTE_NATIVE_INLINE_APPLIED"
RuleStyleLinkNative = "STYLE_LINK_NATIVE_INLINE_APPLIED"
RuleStyleParaWrapper = "STYLE_PARA_WRAPPER_REWRITTEN"
)

// Tag classification ----------------------------------------------------------

// allowedTags enumerates tags that pass through verbatim (spec §4.4 row "通过").
// Lower-case canonical names; the parser normalises tag names so we don't need
// case-insensitive comparison at lookup time.
var allowedTags = map[string]bool{
"p": true,
"div": true,
"span": true,
"br": true,
"hr": true,
"a": true,
"img": true,
"table": true,
"thead": true,
"tbody": true,
"tfoot": true,
"tr": true,
"td": true,
"th": true,
"ul": true,
"ol": true,
"li": true,
"blockquote": true,
"pre": true,
"code": true,
"b": true,
"i": true,
"em": true,
"strong": true,
"u": true,
"s": true,
"strike": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"sub": true,
"sup": true,
"section": true,
"article": true,
"header": true,
"footer": true,
"nav": true,
"main": true,
"figure": true,
"figcaption": true,
"caption": true,
"colgroup": true,
"col": true,
// Document structural tags (golang.org/x/net/html always wraps fragments
// in <html><head><body>); we treat them as transparent so the wrapper
// nodes the parser inserts don't generate spurious findings.
"html": true,
"head": true,
"body": true,
}

// blockedTags enumerates tags whose content is removed in full and a
// SeverityError finding is emitted (spec §4.4 row "错误(删除)"). Each entry
// maps to the rule id surfaced in Finding.RuleID.
var blockedTags = map[string]string{
"script": RuleTagScriptBlocked,
"iframe": RuleTagIframeBlocked,
"object": RuleTagObjectBlocked,
"embed": RuleTagEmbedBlocked,
"form": RuleTagFormBlocked,
"input": RuleTagInputBlocked,
"select": RuleTagInputBlocked,
"option": RuleTagInputBlocked,
"button": RuleTagInputBlocked,
"link": RuleTagLinkBlocked,
"meta": RuleTagMetaBlocked,
"base": RuleTagBaseBlocked,
}

// warnAutofixTags enumerates tags rewritten when AutoFix is true (spec §4.4
// row "警告 + 自动修复"). The replacement strategy is per-tag (see rewrite.go).
var warnAutofixTags = map[string]string{
"font": RuleTagFontToSpan,
"center": RuleTagCenterToDiv,
"marquee": RuleTagMarqueeToText,
"blink": RuleTagBlinkToText,
}

// classifyTag returns the rule kind for the given lower-case tag name.
//
// kind is one of "allow", "warn", "error", "unknown". For "warn" / "error",
// ruleID names the firing rule; for "unknown", the caller falls back to
// allow-list-by-default but emits a hint via RuleTagUnknownStripped only when
// the tag is structurally suspect (e.g. <object>-like). The cli's existing
// `htmlTagRe` regex is the de-facto allow-list shipping with the codebase, so
// we don't aggressively flag anything outside `allowedTags` — drop-through
// preserves user intent for niche tags (e.g. `<details>` / `<summary>`) that
// browsers + Feishu native renderer already handle.
func classifyTag(tag string) (kind, ruleID string) {
tag = strings.ToLower(tag)
if allowedTags[tag] {
return "allow", ""
}
if id, ok := blockedTags[tag]; ok {
return "error", id
}
if id, ok := warnAutofixTags[tag]; ok {
return "warn", id
}
// Unknown / niche tags: pass through silently. The cli's existing
// `htmlTagRe` (mail_quote.go:333) tolerates them too, and the server-side
// RemoteSanitizer will remove anything risky regardless. Users authoring
// HTML in Feishu native classes (`adit-html-block*`, `history-quote-*`,
// `lark-mail-doc-quote`) hit this path — they MUST pass through unchanged
// so reply / forward quote markup survives lint round-trips. (Spec §4.4
// "通过" row second sentence: "飞书原生 quote 体系...class".)
return "allow", ""
}

// Attribute / URL / style classification --------------------------------------

// allowedURLSchemes lists URL schemes that pass through hyperlink-bearing
// attrs (`href`, `src`, `cite`, `formaction` etc.). Per spec §4.4: http(s),
// mailto, cid, data:image/* are allowed; everything else (notably javascript:
// and vbscript:) is blocked. Empty / relative URLs (no scheme) are always
// allowed because they resolve relatively at render time and pose no
// injection vector.
var allowedURLSchemes = map[string]bool{
"http": true,
"https": true,
"mailto": true,
"cid": true,
}

// blockedURLSchemes is the explicit deny-list. data:image/* is special-cased
// in classifyURLValue.
var blockedURLSchemes = map[string]bool{
"javascript": true,
"vbscript": true,
"file": true,
}

// classifyURLValue returns ("ok", "") if the URL value is acceptable, or
// ("error", ruleID) when it must be removed (javascript:/vbscript:/file:),
// or ("warn", ruleID) when the scheme is unrecognised but not actively
// dangerous. Empty values pass through (browsers ignore them).
func classifyURLValue(raw string) (kind, ruleID string) {
value := strings.TrimSpace(raw)
if value == "" {
return "ok", ""
}
// Strip leading whitespace + control bytes that could obscure the
// scheme (e.g. "java\tscript:..."). The html-parser already strips
// stray whitespace at attribute boundaries; this is defence-in-depth
// for older clients that paste from Word with U+0009 / U+0020 inside
// the scheme prefix.
value = strings.Map(func(r rune) rune {
if r < 0x20 || r == 0x7F {
return -1
}
return r
}, value)

// Find the colon delimiter; everything before it is the scheme.
colon := strings.IndexByte(value, ':')
if colon < 0 {
// No scheme → relative URL → allow.
return "ok", ""
}
scheme := strings.ToLower(value[:colon])
rest := value[colon+1:]

switch {
case allowedURLSchemes[scheme]:
return "ok", ""
case scheme == "data":
// data:image/* is whitelisted; anything else (e.g. data:text/html;...)
// is rejected. The check tolerates any subtype under image/* (png /
// jpeg / gif / svg+xml / webp) so users embedding base64 thumbnails
// don't trip the rule.
rest = strings.TrimSpace(rest)
if strings.HasPrefix(strings.ToLower(rest), "image/") {
return "ok", ""
}
return "error", RuleAttrJSURLBlocked
case blockedURLSchemes[scheme]:
return "error", RuleAttrJSURLBlocked
default:
// Unknown scheme: surface a warning so users see it but don't
// drop legitimate webcal:/tel: / similar in case downstream
// renders eventually support them.
return "warn", RuleAttrUnsafeSchemeBlocked
}
}

// urlAttributes lists attributes whose value is a URL and must therefore
// pass classifyURLValue. Lower-case canonical names.
var urlAttributes = map[string]bool{
"href": true,
"src": true,
"cite": true,
"formaction": true,
"action": true,
"background": true,
"poster": true,
}

// allowedStyleProps enumerates CSS property names that pass through the
// inline `style="..."` attribute (spec §4.4 last paragraph). Everything else
// is removed from the property list and surfaced via STYLE_PROPERTY_DROPPED.
//
// `border-*` / `padding-*` / `margin-*` are treated as prefix matches by
// classifyStyleProperty so the four directional variants (border-top etc.)
// are all admitted without enumerating each.
var allowedStyleProps = map[string]bool{
"color": true,
"background-color": true,
"font-size": true,
"font-weight": true,
"font-style": true,
"text-align": true,
"text-decoration": true,
"line-height": true,
"padding": true,
"margin": true,
"border": true,
"width": true,
"height": true,
"display": true,
"text-indent": true,
// Quote-block / native Feishu styles spec §4.4 "通过" (the editor-kit
// branch keeps them). Whitespace + word-break are part of the existing
// `<pre>` / quote wrapper styles in mail_quote.go (e.g. `bodyDivStyle`).
"white-space": true,
"word-break": true,
"word-wrap": true,
"overflow": true,
"overflow-wrap": true,
"vertical-align": true,
"list-style": true,
"list-style-type": true,
"list-style-position": true,
"transition": true,
"font-family": true,
"text-transform": true,
"hyphens": true,
"max-width": true,
"min-width": true,
"max-height": true,
"min-height": true,
"border-radius": true,
"box-sizing": true,
"opacity": true,
"cursor": true,
}

// stylePropAllowedPrefixes enumerates property name prefixes treated as
// allowed regardless of suffix (spec §4.4 "border-*"). A trailing "-" makes
// the prefix self-documenting.
var stylePropAllowedPrefixes = []string{
"border-",
"padding-",
"margin-",
}

// classifyStyleProperty reports whether the given lower-case property name
// is in the allow-list (incl. prefix matches).
func classifyStyleProperty(name string) bool {
name = strings.ToLower(strings.TrimSpace(name))
if name == "" {
return false
}
if allowedStyleProps[name] {
return true
}
for _, p := range stylePropAllowedPrefixes {
if strings.HasPrefix(name, p) {
return true
}
}
return false
}

// isEventHandlerAttr reports whether the attribute name is a DOM event
// handler (`on*`). The lib removes every such attribute regardless of its
// value (spec §4.4 row "错误(删除)" + the well-known XSS vector).
func isEventHandlerAttr(name string) bool {
name = strings.ToLower(strings.TrimSpace(name))
if !strings.HasPrefix(name, "on") {
return false
}
if len(name) <= 2 {
return false
}
// Defence-in-depth: avoid matching legitimate attrs whose name happens
// to begin with "on" (e.g. `onerror`-like attrs all start "on" + ascii
// letter). The `>= 'a'` check filters out "on-something" with hyphens.
c := name[2]
return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')
}
Loading