Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 37 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,9 @@
},
"publishConfig": {
"access": "public"
},
"//dependencies": "entities pinned to 6.x — latest dual CJS/ESM release; 7+ are ESM-only and break the CommonJS jest tests and the .cjs build",
"dependencies": {
"entities": "^6.0.1"
}
}
52 changes: 52 additions & 0 deletions src/parser/placeholders/findEntities.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { findEntities } from "./findEntities";

describe("findEntities", () => {
it("finds a named entity", () => {
const result = findEntities("Activité");
expect(result).toEqual([
{ decoded: "é", raw: "é", position: { start: 7, end: 15 } },
]);
});

it("finds a decimal numeric entity", () => {
const result = findEntities("café");
expect(result[0].decoded).toEqual("é");
expect(result[0].raw).toEqual("é");
});

it("finds a hex numeric entity", () => {
const result = findEntities("café");
expect(result[0].decoded).toEqual("é");
expect(result[0].raw).toEqual("é");
});

it("decodes nbsp", () => {
const result = findEntities("a b");
expect(result[0].decoded).toEqual(" ");
});

it("decodes amp, lt, gt", () => {
expect(findEntities("&")[0].decoded).toEqual("&");
expect(findEntities("&lt;")[0].decoded).toEqual("<");
expect(findEntities("&gt;")[0].decoded).toEqual(">");
});

it("ignores a bare ampersand", () => {
expect(findEntities("Tom & Jerry")).toEqual([]);
expect(findEntities("AT&T")).toEqual([]);
});

it("ignores an unrecognized reference", () => {
expect(findEntities("a&notarealentity;b")).toEqual([]);
});

it("requires the trailing semicolon", () => {
expect(findEntities("&eacute no semicolon")).toEqual([]);
});

it("finds multiple entities", () => {
const result = findEntities("&eacute;-&agrave;");
expect(result.map((e) => e.decoded)).toEqual(["é", "à"]);
expect(result[1].position.start).toEqual(9);
});
});
33 changes: 33 additions & 0 deletions src/parser/placeholders/findEntities.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { decodeHTMLStrict } from "entities";

export type EntityInfoType = {
decoded: string;
raw: string;
position: { start: number; end: number };
};

// Named (&eacute;), decimal (&#233;) or hex (&#xE9;) HTML character references.
// A trailing semicolon is required so a bare "&" in text (e.g. "Tom & Jerry")
// is never matched.
const ENTITY_REGEX = /&(#x[0-9a-fA-F]+|#[0-9]+|[a-zA-Z][a-zA-Z0-9]*);/g;

export const findEntities = (text: string): EntityInfoType[] => {
const result: EntityInfoType[] = [];
ENTITY_REGEX.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = ENTITY_REGEX.exec(text)) !== null) {
const raw = match[0];
const decoded = decodeHTMLStrict(raw);
// decodeHTMLStrict returns the input unchanged when it isn't a recognized
// reference — that's how we tell "&b;" apart from "&eacute;".
if (decoded === raw) {
continue;
}
result.push({
decoded,
raw,
position: { start: match.index, end: match.index + raw.length },
});
}
return result;
};
25 changes: 25 additions & 0 deletions src/parser/placeholders/getPlaceholders.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,29 @@ describe("get placeholders", () => {
expect(first.normalizedValue).toEqual("{test}");
expect(first.position.start).toEqual(3);
});

it("parse html entity", () => {
const placeholders = getPlaceholders("Activit&eacute;");
expect(placeholders![0].type).toEqual("entity");
expect(placeholders![0].name).toEqual("é");
expect(placeholders![0].normalizedValue).toEqual("&eacute;");
});

it("ignores a bare ampersand", () => {
expect(getPlaceholders("Tom & Jerry")).toEqual([]);
});

it("orders entities and tags by position", () => {
const placeholders = getPlaceholders("&amp;<a>x</a>");
expect(placeholders!.map((p) => p.type)).toEqual([
"entity",
"tagOpen",
"tagClose",
]);
});

it("ignores entities inside a tag attribute", () => {
const placeholders = getPlaceholders('<a href="x&amp;y">z</a>');
expect(placeholders!.map((p) => p.type)).toEqual(["tagOpen", "tagClose"]);
});
});
Loading
Loading