-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix-html-refs.sh
More file actions
executable file
·40 lines (36 loc) · 1.53 KB
/
fix-html-refs.sh
File metadata and controls
executable file
·40 lines (36 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env bash
#
# Rewrite asset references in HTML files: strip the URL-encoded query
# suffix ('%3F' + everything after it, up to the URL delimiter).
#
# img/new-logo-day.svg%3F1DFDE359...725A -> img/new-logo-day.svg
# highlightjs/styles/x.css%3FHASH.css -> highlightjs/styles/x.css
#
# The regex requires at least ONE character after %3F, so a bare trailing
# "%3F" (e.g. the external link .../clojure.core/some%3F, where %3F is a
# real encoded '?') is left untouched.
#
# Delimiters that end a URL: quotes, parens, angle brackets, whitespace, comma.
#
# Usage:
# ./fix-html-refs.sh [DIR] # edit in place (default dir: .)
# DRY_RUN=1 ./fix-html-refs.sh # show what would change, edit nothing
set -euo pipefail
root="${1:-.}"
# Perl one-liner: %3F (case-insensitive) + run of non-delimiter chars -> removed.
pattern='s/%3[Ff][^"'"'"'()<>\s,]+//g'
if [ -n "${DRY_RUN:-}" ]; then
# Show only files (and lines) that would change, without writing.
find "$root" -type f -name '*.html' -print0 |
while IFS= read -r -d '' f; do
if perl -ne 'exit 1 if /%3[Ff][^"'"'"'()<>\s,]+/' "$f"; then :; else
printf '=== %s ===\n' "$f"
perl -ne 'print if /%3[Ff][^"'"'"'()<>\s,]+/' "$f" | head -3
fi
done
else
find "$root" -type f -name '*.html' -print0 |
xargs -0 perl -i -pe "$pattern"
echo "Done. Remaining %3F occurrences (should only be trailing ones like some%3F):"
grep -rhoE '[^"'"'"'()<> ,]*%3[Ff][^"'"'"'()<> ,]*' --include='*.html' "$root" | sort -u
fi