From a9716f47013b5cd62be41870171ca34506f7de24 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Wed, 12 Aug 2020 23:34:10 +0400 Subject: [PATCH 1/9] pulse-feed-parser-3 Roadmap to 1.0.0 --- .eslintrc.js | 12 ++++++++++++ package.json | 1 + tsconfig.json | 18 ++++++++++++++---- yarn.lock | 7 +++++++ 4 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 .eslintrc.js diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..47b00d4 --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,12 @@ +module.exports = { + "extends": [ + "react-app", + "prettier/@typescript-eslint", + "plugin:prettier/recommended" + ], + "settings": { + "react": { + "version": "999.999.999" + } + } +} \ No newline at end of file diff --git a/package.json b/package.json index 02cc843..2931983 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "module": "dist/pulse-feed-parser.esm.js", "devDependencies": { "@size-limit/preset-small-lib": "^4.5.5", + "eslint-plugin-prettier": "^3.1.4", "husky": "^4.2.5", "prettier": "^2.0.5", "size-limit": "^4.5.5", diff --git a/tsconfig.json b/tsconfig.json index 60f11f2..032d00a 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,8 +1,14 @@ { - "include": ["src", "types"], + "include": [ + "src", + "types" + ], "compilerOptions": { "module": "esnext", - "lib": ["dom", "esnext"], + "lib": [ + "dom", + "esnext" + ], "importHelpers": true, "declaration": true, "sourceMap": true, @@ -15,9 +21,13 @@ "moduleResolution": "node", "baseUrl": "src", "paths": { - "*": ["src/*", "node_modules/*"] + "*": [ + "src/*", + "node_modules/*" + ] }, "jsx": "react", - "esModuleInterop": true + "esModuleInterop": true, + "resolveJsonModule": true } } diff --git a/yarn.lock b/yarn.lock index c984f4c..d2818c0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3256,6 +3256,13 @@ eslint-plugin-prettier@^3.1.0: dependencies: prettier-linter-helpers "^1.0.0" +eslint-plugin-prettier@^3.1.4: + version "3.1.4" + resolved "https://registry.yarnpkg.com/eslint-plugin-prettier/-/eslint-plugin-prettier-3.1.4.tgz#168ab43154e2ea57db992a2cd097c828171f75c2" + integrity sha512-jZDa8z76klRqo+TdGDTFJSavwbnWK2ZpqGKNZ+VvweMW516pDUMmQ2koXvxEE4JhzNvTv+radye/bWGBmA6jmg== + dependencies: + prettier-linter-helpers "^1.0.0" + eslint-plugin-react-hooks@^2.2.0: version "2.5.1" resolved "https://registry.yarnpkg.com/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-2.5.1.tgz#4ef5930592588ce171abeb26f400c7fbcbc23cd0" From 31c6a885e208947bc478039b74cdcaf797485d86 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Wed, 12 Aug 2020 23:35:30 +0400 Subject: [PATCH 2/9] feat: basic sanitizer functionality --- src/utils/sanitizing.ts | 509 ++++++++++++++++++++++++++++++++ test/sanitize.test.ts | 17 ++ test/stubs/content/content.html | 12 + test/stubs/content/web.dev.html | 139 +++++++++ 4 files changed, 677 insertions(+) create mode 100644 src/utils/sanitizing.ts create mode 100644 test/sanitize.test.ts create mode 100644 test/stubs/content/content.html create mode 100644 test/stubs/content/web.dev.html diff --git a/src/utils/sanitizing.ts b/src/utils/sanitizing.ts new file mode 100644 index 0000000..5e4e2ab --- /dev/null +++ b/src/utils/sanitizing.ts @@ -0,0 +1,509 @@ +const ACCEPTABLE_ELEMENTS = new Set([ + 'a', + 'abbr', + 'acronym', + 'address', + 'area', + 'article', + 'aside', + 'audio', + 'b', + 'big', + 'blockquote', + 'br', + 'button', + 'canvas', + 'caption', + 'center', + 'cite', + 'code', + 'col', + 'colgroup', + 'command', + 'datagrid', + 'datalist', + 'dd', + 'del', + 'details', + 'dfn', + 'dialog', + 'dir', + 'div', + 'dl', + 'dt', + 'em', + 'event-source', + 'fieldset', + 'figcaption', + 'figure', + 'font', + 'footer', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'header', + 'hr', + 'i', + 'img', + 'input', + 'ins', + 'kbd', + 'keygen', + 'label', + 'legend', + 'li', + 'm', + 'map', + 'menu', + 'meter', + 'multicol', + 'nav', + 'nextid', + 'noscript', + 'ol', + 'optgroup', + 'option', + 'output', + 'p', + 'pre', + 'progress', + 'q', + 's', + 'samp', + 'section', + 'select', + 'small', + 'sound', + 'source', + 'spacer', + 'span', + 'strike', + 'strong', + 'sub', + 'sup', + 'table', + 'tbody', + 'td', + 'textarea', + 'tfoot', + 'th', + 'thead', + 'time', + 'tr', + 'tt', + 'u', + 'ul', + 'var', + 'video', + 'iframe', +]); + +const ACCEPTABLE_ATTRIBUTES = new Set([ + 'abbr', + 'align', + 'alt', + 'autocomplete', + 'autofocus', + 'cellpadding', + 'cellspacing', + 'cite', + 'colspan', + 'compact', + 'disabled', + 'height', + 'href', + 'hreflang', + 'label', + 'rows', + 'rowspan', + 'span', + 'src', + 'target', + 'title', + 'width', +]); + +const MATHML_ELEMENTS = new Set([ + 'annotation', + 'annotation-xml', + 'maction', + 'maligngroup', + 'malignmark', + 'math', + 'menclose', + 'merror', + 'mfenced', + 'mfrac', + 'mglyph', + 'mi', + 'mlabeledtr', + 'mlongdiv', + 'mmultiscripts', + 'mn', + 'mo', + 'mover', + 'mpadded', + 'mphantom', + 'mprescripts', + 'mroot', + 'mrow', + 'ms', + 'mscarries', + 'mscarry', + 'msgroup', + 'msline', + 'mspace', + 'msqrt', + 'msrow', + 'mstack', + 'mstyle', + 'msub', + 'msubsup', + 'msup', + 'mtable', + 'mtd', + 'mtext', + 'mtr', + 'munder', + 'munderover', + 'none', + 'semantics', +]); + +const MATHML_ATTRIBUTES = new Set([ + 'accent', + 'accentunder', + 'actiontype', + 'align', + 'alignmentscope', + 'altimg', + 'altimg-height', + 'altimg-valign', + 'altimg-width', + 'alttext', + 'bevelled', + 'charalign', + 'close', + 'columnalign', + 'columnlines', + 'columnspacing', + 'columnspan', + 'columnwidth', + 'crossout', + 'decimalpoint', + 'denomalign', + 'depth', + 'dir', + 'display', + 'displaystyle', + 'edge', + 'encoding', + 'equalcolumns', + 'equalrows', + 'fence', + 'fontstyle', + 'fontweight', + 'form', + 'frame', + 'framespacing', + 'groupalign', + 'height', + 'href', + 'id', + 'indentalign', + 'indentalignfirst', + 'indentalignlast', + 'indentshift', + 'indentshiftfirst', + 'indentshiftlast', + 'indenttarget', + 'infixlinebreakstyle', + 'largeop', + 'length', + 'linebreak', + 'linebreakmultchar', + 'linebreakstyle', + 'lineleading', + 'linethickness', + 'location', + 'longdivstyle', + 'lquote', + 'lspace', + 'mathbackground', + 'mathcolor', + 'mathsize', + 'mathvariant', + 'maxsize', + 'minlabelspacing', + 'minsize', + 'movablelimits', + 'notation', + 'numalign', + 'open', + 'other', + 'overflow', + 'position', + 'rowalign', + 'rowlines', + 'rowspacing', + 'rowspan', + 'rquote', + 'rspace', + 'scriptlevel', + 'scriptminsize', + 'scriptsizemultiplier', + 'selection', + 'separator', + 'separators', + 'shift', + 'side', + 'src', + 'stackalign', + 'stretchy', + 'subscriptshift', + 'superscriptshift', + 'symmetric', + 'voffset', + 'width', + 'xlink:href', + 'xlink:show', + 'xlink:type', + 'xmlns', + 'xmlns:xlink', +]); + +const SVG_ELEMENTS = new Set([ + 'a', + 'animate', + 'animateColor', + 'animateMotion', + 'animateTransform', + 'circle', + 'defs', + 'desc', + 'ellipse', + 'font-face', + 'font-face-name', + 'font-face-src', + 'foreignObject', + 'g', + 'glyph', + 'hkern', + 'line', + 'linearGradient', + 'marker', + 'metadata', + 'missing-glyph', + 'mpath', + 'path', + 'polygon', + 'polyline', + 'radialGradient', + 'rect', + 'set', + 'stop', + 'svg', + 'switch', + 'text', + 'title', + 'tspan', + 'use', +]); + +const SVG_ATTRIBUTES = new Set([ + 'accent-height', + 'accumulate', + 'additive', + 'alphabetic', + 'arabic-form', + 'ascent', + 'attributeName', + 'attributeType', + 'baseProfile', + 'bbox', + 'begin', + 'by', + 'calcMode', + 'cap-height', + 'class', + 'color', + 'color-rendering', + 'content', + 'cx', + 'cy', + 'd', + 'descent', + 'display', + 'dur', + 'dx', + 'dy', + 'end', + 'fill', + 'fill-opacity', + 'fill-rule', + 'font-family', + 'font-size', + 'font-stretch', + 'font-style', + 'font-variant', + 'font-weight', + 'from', + 'fx', + 'fy', + 'g1', + 'g2', + 'glyph-name', + 'gradientUnits', + 'hanging', + 'height', + 'horiz-adv-x', + 'horiz-origin-x', + 'id', + 'ideographic', + 'k', + 'keyPoints', + 'keySplines', + 'keyTimes', + 'lang', + 'marker-end', + 'marker-mid', + 'marker-start', + 'markerHeight', + 'markerUnits', + 'markerWidth', + 'mathematical', + 'max', + 'min', + 'name', + 'offset', + 'opacity', + 'orient', + 'origin', + 'overline-position', + 'overline-thickness', + 'panose-1', + 'path', + 'pathLength', + 'points', + 'preserveAspectRatio', + 'r', + 'refX', + 'refY', + 'repeatCount', + 'repeatDur', + 'requiredExtensions', + 'requiredFeatures', + 'restart', + 'rotate', + 'rx', + 'ry', + 'slope', + 'stemh', + 'stemv', + 'stop-color', + 'stop-opacity', + 'strikethrough-position', + 'strikethrough-thickness', + 'stroke', + 'stroke-dasharray', + 'stroke-dashoffset', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-miterlimit', + 'stroke-opacity', + 'stroke-width', + 'systemLanguage', + 'target', + 'text-anchor', + 'to', + 'transform', + 'type', + 'u1', + 'u2', + 'underline-position', + 'underline-thickness', + 'unicode', + 'unicode-range', + 'units-per-em', + 'values', + 'version', + 'viewBox', + 'visibility', + 'width', + 'widths', + 'x', + 'x-height', + 'x1', + 'x2', + 'xlink:actuate', + 'xlink:arcrole', + 'xlink:href', + 'xlink:role', + 'xlink:show', + 'xlink:title', + 'xlink:type', + 'xml:base', + 'xml:lang', + 'xml:space', + 'xmlns', + 'xmlns:xlink', + 'y', + 'y1', + 'y2', + 'zoomAndPan', +]); + +const ACCEPTABLE_SVG_ROPERTIES = new Set([ + 'fill', + 'fill-opacity', + 'fill-rule', + 'stroke', + 'stroke-linecap', + 'stroke-linejoin', + 'stroke-opacity', + 'stroke-width', +]); + +/** + * Clear the given DOM three from unwanted elements and attributes. + */ +export const sanitize = (doc: Document): Element => { + // const e = doc.body.cloneNode(true); + const walker = doc.createTreeWalker( + doc.body, + NodeFilter.SHOW_ELEMENT + NodeFilter.SHOW_COMMENT + ); + + while (walker.nextNode()) { + const current = walker.currentNode as Element; + + if ( + // Strip HTML comments + current.nodeType === Node.COMMENT_NODE || + // Strip empty elements + current.childNodes.length === 0 || + // Strip unacceptable elements + !ACCEPTABLE_ELEMENTS.has(current.nodeName.toLowerCase()) + ) { + const parent = current.parentNode!; + + parent.removeChild(current); + + // Set currentNode to parent to prevent breaking of the walk + walker.currentNode = parent; + continue; + } + + current.getAttributeNames().forEach(attribute => { + if (!ACCEPTABLE_ATTRIBUTES.has(attribute)) { + current.removeAttribute(attribute); + } + }); + } + + return doc.body; +}; diff --git a/test/sanitize.test.ts b/test/sanitize.test.ts new file mode 100644 index 0000000..9ba1a90 --- /dev/null +++ b/test/sanitize.test.ts @@ -0,0 +1,17 @@ +import fs from 'fs'; +import path from 'path'; +import { sanitize } from '../src/utils/sanitizing'; + +const content = fs.readFileSync( + path.resolve(__dirname, './stubs/content/web.dev.html'), + { encoding: 'utf8' } +); + +describe('Content sanitization', () => { + it('should sanitize HTML', () => { + const doc = new DOMParser().parseFromString(content, 'text/html'); + const sanitized = sanitize(doc); + + console.log(sanitized.innerHTML); + }); +}); diff --git a/test/stubs/content/content.html b/test/stubs/content/content.html new file mode 100644 index 0000000..58c5eb9 --- /dev/null +++ b/test/stubs/content/content.html @@ -0,0 +1,12 @@ + +

+ + Hello, World! +

+ +
+ +

+ diff --git a/test/stubs/content/web.dev.html b/test/stubs/content/web.dev.html new file mode 100644 index 0000000..ff3dc1f --- /dev/null +++ b/test/stubs/content/web.dev.html @@ -0,0 +1,139 @@ +

With more and more users spending most of their time in the browser, richly interactive websites, games, remote + desktop streaming, and application streaming strive to provide an immersive, full screen experience. To accomplish + this, sites need access to special keys and keyboard shortcuts while they are in full screen mode, so that they can be + used for navigation, menus, or gaming functionality. Some examples of the keys that may be required are Esc, + Alt + Tab, Cmd + `, and Ctrl + N.

By default, + these keys are not available to the web application because they are captured by the browser or the underlying + operating system. The Keyboard Lock API enables websites to use all available keys allowed by the host OS (see Browser compatibility).

+
Ubuntu Linux streamed to a browser tab in macOS Chrome (not running in full screen mode yet). +
The problem: a streamed Ubuntu Linux remote desktop not running in full screen mode and without + active keyboard lock, so system keys are still captured by the macOS host operating system and the experience is + not immersive yet. +
+

Using the Keyboard Lock API

The Keyboard interface of the the + Keyboard API provides functions that toggle capturing of key presses from the physical keyboard as well as getting + information about the used keyboard + layout.

Prerequisite

There are two different types + of full screen available in modern browsers: JavaScript-initiated via the Fullscreen API and user-initiated via a + keyboard shortcut. The Keyboard Lock API is only available when JavaScript-initiated full + screen is active. Here's an example of JavaScript-initiated full screen:

+
+
await document.documentElement.requestFullscreen();
+

Browser compatibility

You can see browser + compatibility on Can I use. Note that not all system + keys can be locked. This varies from operating system to operating system. For example, follow crbug.com/855738 for progress updates on system keyboard lock for macOS.

Feature detection +

You can use the following pattern to check if the Keyboard Lock API is supported:

+
+
if ('keyboard' in navigator && 'lock' in navigator.keyboard) {
// Supported!
}
+

Locking the keyboard

The lock() method of the Keyboard + interface returns a promise after enabling the capture of key presses for any or all of the keys on the physical + keyboard. This method can only capture keys that are granted access by the underlying operating system. The lock() + method takes an array of one or more key codes to lock. If no key codes are provided, all keys will be locked. A list + of valid key code values is available in the UI + Events KeyboardEvent code Values spec.

Capturing all keys

The following example captures all + key presses.

+
+
navigator.keyboard.lock();
+

Capturing specific keys

The following example + captures the W, A, S, and D keys. It captures these keys regardless of + which modifiers are used with the key press. Assuming a US QWERTY layout, registering "KeyW" + ensures that W, Shift + W, Control + W, Control + + Shift + W, and all other key modifier combinations with W are sent to the app. The + same applies to "KeyA", "KeyS", and "KeyD".

+
+
await navigator.keyboard.lock([
"KeyW",
"KeyA",
"KeyS",
"KeyD",
]);
+

You can respond to captured key presses using keyboard events. For example this code uses the + onkeydown event:

+
+
document.addEventListener('keydown', (e) => {
if ((e.code === 'KeyA') && !(event.ctrlKey || event.metaKey)) {
// Do something when the 'A' key was pressed, but only
// when not in combination with the command or control key.
}
});
+

Unlocking the keyboard

The unlock() method unlocks all + keys captured by the lock() method and returns synchronously.

+
+
navigator.keyboard.unlock();
+

When a document is closed, the browser always implicitly calls unlock().

Demo

You can test the Keyboard Lock API by running + the demo on Glitch. Be sure to check out the source code. Clicking the Enter full screen button + below launches the demo in a new window so it can enter full screen mode.

+
+ +

Security Considerations

One concern with + this API is that it could be used to grab all of the keys and (in conjunction with the Fullscreen API and the PointerLock API) prevent the user from + exiting the web page. To prevent this, the spec requires the browser to provide a way for the user to exit from + keyboard lock even if all of the keys are requested by the API. In Chrome, this escape hatch is a long (two second) + Esc key press to trigger an exit from Keyboard Lock.

+

Acknowledgements +

This article was reviewed by Joe Medley and Kayce Basques. The Keyboard Lock spec is authored by Gary Kacmarcik and Jamie Walch. Hero image by Ken Suarez on Unsplash. +

From 552f265dbe12fe5a2ee661eef6590f84a2870c73 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Wed, 12 Aug 2020 23:37:13 +0400 Subject: [PATCH 3/9] chore: fix code style with prettier --- src/Adapter/RSSFeedAdapter.ts | 11 ++++++-- src/Extensions/DublinCoreExtension.ts | 36 +++++++++++++-------------- src/Parsers/AtomParser.ts | 12 ++++++--- src/index.ts | 4 +-- src/types/Atom.ts | 4 +-- src/types/Extension.ts | 10 ++++---- src/utils/extensions.ts | 11 +++++--- src/utils/parsePerson.ts | 10 ++++---- test/RssParser.test.ts | 2 +- 9 files changed, 57 insertions(+), 43 deletions(-) diff --git a/src/Adapter/RSSFeedAdapter.ts b/src/Adapter/RSSFeedAdapter.ts index e8a2b0a..514fd7a 100644 --- a/src/Adapter/RSSFeedAdapter.ts +++ b/src/Adapter/RSSFeedAdapter.ts @@ -1,5 +1,12 @@ -import { RSSFeed, RSSItem } from '../types/RSS'; -import { Enclosure, Feed, Image, Item, Person } from '../types/Feed'; +import { + Enclosure, + Feed, + Image, + Item, + Person, + RSSFeed, + RSSItem, +} from '../types'; import { parsePerson } from '../utils/parsePerson'; export class RSSFeedAdapter { diff --git a/src/Extensions/DublinCoreExtension.ts b/src/Extensions/DublinCoreExtension.ts index 6574a9a..0942c5a 100644 --- a/src/Extensions/DublinCoreExtension.ts +++ b/src/Extensions/DublinCoreExtension.ts @@ -1,18 +1,18 @@ -type DCExtension = { - title: Maybe - creator: Maybe - author: Maybe - subject: Maybe - description: Maybe - publisher: Maybe - contributor: Maybe - date: Maybe - type: Maybe - format: Maybe - identifier: Maybe - source: Maybe - language: Maybe - relation: Maybe - coverage: Maybe - rights: Maybe -} +export type DCExtension = { + title: Maybe; + creator: Maybe; + author: Maybe; + subject: Maybe; + description: Maybe; + publisher: Maybe; + contributor: Maybe; + date: Maybe; + type: Maybe; + format: Maybe; + identifier: Maybe; + source: Maybe; + language: Maybe; + relation: Maybe; + coverage: Maybe; + rights: Maybe; +}; diff --git a/src/Parsers/AtomParser.ts b/src/Parsers/AtomParser.ts index 9cc189d..8a1f30d 100644 --- a/src/Parsers/AtomParser.ts +++ b/src/Parsers/AtomParser.ts @@ -7,7 +7,7 @@ import { AtomLink, AtomPerson, AtomSource, -} from '../types/Atom'; +} from '../types'; import { getExtensionName, isExtension, @@ -141,7 +141,8 @@ export class AtomParser { ...this.feed.extensions[ext][prop], extension, ]; - } if (tagName === 'title') { + } + if (tagName === 'title') { this.feed.title = this.parseText(walker.currentNode as Element); } else if (tagName === 'id') { this.feed.id = this.parseText(walker.currentNode as Element); @@ -217,7 +218,10 @@ export class AtomParser { entry.extensions[ext][prop] = []; } - entry.extensions[ext][prop] = [...entry.extensions[ext][prop], extension]; + entry.extensions[ext][prop] = [ + ...entry.extensions[ext][prop], + extension, + ]; } else if (tagName === 'title') { entry.title = this.parseText(walker.currentNode as Element); } else if (tagName === 'id') { @@ -395,7 +399,7 @@ export class AtomParser { // If type="xhtml", then this element contains inline xhtml, wrapped in a div element. if (type === 'xhtml') { - return node.firstElementChild!.textContent!.trim() + return node.firstElementChild!.textContent!.trim(); } return null; diff --git a/src/index.ts b/src/index.ts index 965900f..b5ca2da 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,6 @@ export * from './Parsers/RSSParser'; export * from './Parsers/AtomParser'; export * from './Adapter/AtomFeedAdapter'; export * from './Adapter/RSSFeedAdapter'; -export * from './Errors/FeedTypeError' -export * from './Errors/NetworkError' +export * from './Errors/FeedTypeError'; +export * from './Errors/NetworkError'; export * from './types'; diff --git a/src/types/Atom.ts b/src/types/Atom.ts index f06f4b8..f20c02a 100644 --- a/src/types/Atom.ts +++ b/src/types/Atom.ts @@ -16,7 +16,7 @@ export type AtomFeed = { authors: Maybe>; categories: Maybe>; entries: Maybe>; - extensions: Maybe + extensions: Maybe; }; // Entry is an Atom Entry @@ -33,7 +33,7 @@ export type AtomEntry = { published: Maybe; source: Maybe; content: Maybe; - extensions: Maybe + extensions: Maybe; }; // Category is category metadata for Feeds and Entries diff --git a/src/types/Extension.ts b/src/types/Extension.ts index 52eb3ca..40b1153 100644 --- a/src/types/Extension.ts +++ b/src/types/Extension.ts @@ -1,10 +1,10 @@ export type Extension = { name: string; value: Maybe; - attrs: Maybe - children: Maybe> -} + attrs: Maybe; + children: Maybe>; +}; export type Attrs = { - [key: string]: string -} + [key: string]: string; +}; diff --git a/src/utils/extensions.ts b/src/utils/extensions.ts index e586c3e..0bd1342 100644 --- a/src/utils/extensions.ts +++ b/src/utils/extensions.ts @@ -1,4 +1,4 @@ -import { Extension } from '../types/Extension'; +import { Extension } from '../types'; const EXTENSION: Extension = { name: '', @@ -26,13 +26,16 @@ export const getExtensionName = (node: Element): string => node.prefix!; */ export const parseExtension = (node: Element): [string, Extension] => { const ext = { ...EXTENSION }; - const firstChildName = node.firstChild?.nodeName; + const firstChild = node.firstChild; + const isTextNode = + node.firstChild?.nodeType === Node.COMMENT_NODE || + node.firstChild?.nodeType === Node.CDATA_SECTION_NODE; + ext.name = node.nodeName.toLowerCase(); - const isTextNode = ['#text', '#cdata-section'].includes(firstChildName!); if (isTextNode) { ext.value = node.textContent!.trim(); - } else if (firstChildName !== undefined) { + } else if (firstChild !== null) { // child will be undefined in case of self-closing node // like ext.children = Array.from(node.childNodes).map(node => { diff --git a/src/utils/parsePerson.ts b/src/utils/parsePerson.ts index eaf568d..05cc2b1 100644 --- a/src/utils/parsePerson.ts +++ b/src/utils/parsePerson.ts @@ -1,9 +1,9 @@ -import { Person } from '../types/Feed'; +import { Person } from '../types'; -const emailNameRgx = new RegExp(`^([^@]+@[^\s]+)\s+\(([^@]+)\)$`); -const nameEmailRgx = new RegExp(`^([^@]+)\s+\(([^@]+@[^)]+)\)$`); -const nameOnlyRgx = new RegExp(`^([^@()]+)$`); -const emailOnlyRgx = new RegExp(`^([^@()]+@[^@()]+)$`); +const emailNameRgx = new RegExp(/^([^@]+@[^\s]+)\s+\(([^@]+)\)$/); +const nameEmailRgx = new RegExp(/^([^@]+)\s+\(([^@]+@[^)]+)\)$/); +const nameOnlyRgx = new RegExp(/^([^@()]+)$/); +const emailOnlyRgx = new RegExp(/^([^@()]+@[^@()]+)$/); // ParseNameAddress parses name/email strings commonly // found in RSS feeds of the format "Example Name (example@site.com)" diff --git a/test/RssParser.test.ts b/test/RssParser.test.ts index 61f0765..be04c02 100644 --- a/test/RssParser.test.ts +++ b/test/RssParser.test.ts @@ -1,6 +1,6 @@ import fs from 'fs'; import path from 'path'; -import { RSSParser } from '../src/Parsers/RSSParser'; +import { RSSParser } from '../src'; const feedPaths = [ path.join(__dirname, './stubs/rss/github.xml'), From b248d80dfe0332fe5706e020f8f40393f14380bb Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Wed, 12 Aug 2020 23:40:54 +0400 Subject: [PATCH 4/9] feat!: change parser signature --- src/Parser.ts | 59 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/src/Parser.ts b/src/Parser.ts index fc74f0a..91be34c 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -1,4 +1,4 @@ -import { Feed } from './types/Feed'; +import { Feed } from './types'; import { FeedType, XmlFeedTypeDetector } from './XmlFeedTypeDetector'; import { RSSParser } from './Parsers/RSSParser'; import { AtomParser } from './Parsers/AtomParser'; @@ -6,28 +6,58 @@ import { RSSFeedAdapter } from './Adapter/RSSFeedAdapter'; import { AtomFeedAdapter } from './Adapter/AtomFeedAdapter'; import { NetworkError } from './Errors/NetworkError'; import { FeedTypeError } from './Errors/FeedTypeError'; +import { version } from '../package.json'; -export const DEFAULT_FETCH_HEADERS = { - 'User-Agent': 'PulseRSS/1.0', +export type PFPOptions = { + /** + * Enables HTML content sanitization. + * Default sanitization rules will strip unwanted tags, attributes, comments + * and empty paragraphs. You can change this behavior with a function. + */ + sanitization?: boolean; + /** + * Options that will be passed to fetch() while parsing feeds from URLs. + * Default options contain a User-Agent string specific to PFP. + */ + fetchOptions?: RequestInit; +}; + +const DEFAULT_OPTIONS = { + sanitization: true, + fetchOptions: { + headers: { + 'User-Agent': `pulse-feed-parser/${version}`, + }, + }, }; /** - * Parser Factory + * Pulse Feed Parser Factory */ export class Parser { - fetchOptions: RequestInit; + options: PFPOptions; - constructor( - { fetchOptions }: { fetchOptions: RequestInit } = { fetchOptions: {} } - ) { - this.fetchOptions = fetchOptions; + /** + * Changed options will be merged with the defaults. + */ + constructor(options: PFPOptions) { + this.options = { + ...options, + fetchOptions: { + ...options.fetchOptions, + headers: { + ...DEFAULT_OPTIONS.fetchOptions.headers, + ...options.fetchOptions?.headers, + }, + }, + }; } + /** + * Try to parse a feed from the given URL. + */ public async parseURL(url: string): Promise { - const response = await fetch(url, { - ...this.fetchOptions, - headers: { ...DEFAULT_FETCH_HEADERS, ...this.fetchOptions?.headers }, - }); + const response = await fetch(url, this.options.fetchOptions); if (response.status < 200 || response.status >= 300) { throw new NetworkError(`The feed is unreachable`, response.status); @@ -41,6 +71,9 @@ export class Parser { return this.parseDocument(doc); } + /** + * Parse a feed from the given XML document. + */ public parseDocument(doc: Document): Feed { const type = XmlFeedTypeDetector.detect(doc); From a86342338e9a718848be8d5b29f6628b956ade8f Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Wed, 12 Aug 2020 23:41:21 +0400 Subject: [PATCH 5/9] feat!: create IParser interface --- src/Adapter/AtomFeedAdapter.ts | 11 +++++++++-- src/Parsers/RSSParser.ts | 30 ++++++++++++++++++++++-------- src/types/IParser.ts | 9 +++++++++ src/types/RSS.ts | 4 ++-- src/types/index.ts | 1 + test/AtomParser.test.ts | 2 +- 6 files changed, 44 insertions(+), 13 deletions(-) create mode 100644 src/types/IParser.ts diff --git a/src/Adapter/AtomFeedAdapter.ts b/src/Adapter/AtomFeedAdapter.ts index 9e8e548..922ba56 100644 --- a/src/Adapter/AtomFeedAdapter.ts +++ b/src/Adapter/AtomFeedAdapter.ts @@ -1,5 +1,12 @@ -import { Enclosure, Feed, Image, Item, Person } from '../types/Feed'; -import { AtomEntry, AtomFeed } from '../types/Atom'; +import { + Enclosure, + Feed, + Image, + Item, + Person, + AtomEntry, + AtomFeed, +} from '../types'; import { parsePerson } from '../utils/parsePerson'; // DefaultAtomTranslator converts an atom.Feed struct diff --git a/src/Parsers/RSSParser.ts b/src/Parsers/RSSParser.ts index 636c605..60d0917 100644 --- a/src/Parsers/RSSParser.ts +++ b/src/Parsers/RSSParser.ts @@ -8,27 +8,30 @@ import { RSSSource, RSSCloud, RSSTextInput, -} from '../types/RSS'; + IParser, + ParserOptions, +} from '../types'; import { append } from '../utils/collection'; import { getExtensionName, isExtension, parseExtension, } from '../utils/extensions'; +import { sanitize } from '../utils/sanitizing'; /** * Parser for RSS feeds */ -export class RSSParser { +export class RSSParser implements IParser { feed: RSSFeed; private readonly image: RSSImage; private readonly item: RSSItem; private readonly guid: RSSGUID; private readonly textInput: RSSTextInput; - private document: Document; + private options: ParserOptions; - constructor(document: Document) { - this.document = document; + constructor(options: ParserOptions) { + this.options = options; this.image = { description: null, @@ -88,8 +91,8 @@ export class RSSParser { }; } - public parse(): RSSFeed { - const root = this.document.firstElementChild; + public parse(doc: Document): RSSFeed { + const root = doc.firstElementChild; if (root === null) { throw new Error('No root node'); @@ -244,7 +247,7 @@ export class RSSParser { } else if (tagName === 'pubdate') { item.pubDate = this.parseText(walker.currentNode as Element); } else if (tagName === 'content:encoded') { - item.content = this.parseText(walker.currentNode as Element); + item.content = this.parseHTML(walker.currentNode as Element); } else if (tagName === 'source') { item.source = this.parseSource(walker.currentNode as Element); } else if (tagName === 'enclosure') { @@ -336,4 +339,15 @@ export class RSSParser { return null; } + + private parseHTML(node: Node): Maybe { + const text = this.parseText(node); + + if (text === null) return null; + + const doc = new DOMParser().parseFromString(text, 'text/html'); + const element = sanitize(doc); + + return element.innerHTML; + } } diff --git a/src/types/IParser.ts b/src/types/IParser.ts new file mode 100644 index 0000000..959f2a3 --- /dev/null +++ b/src/types/IParser.ts @@ -0,0 +1,9 @@ +export type ParserOptions = { + sanitization: boolean; +}; + +export interface IParser { + constructor(options: ParserOptions): void; + + parse(doc: Document): any; +} diff --git a/src/types/RSS.ts b/src/types/RSS.ts index 01fc4b3..3c5beef 100644 --- a/src/types/RSS.ts +++ b/src/types/RSS.ts @@ -22,7 +22,7 @@ export type RSSFeed = { cloud: Maybe; textInput: Maybe; items: Maybe>; - extensions: Maybe + extensions: Maybe; }; // Item is an RSS Item @@ -38,7 +38,7 @@ export type RSSItem = { guid: Maybe; pubDate: Maybe; source: Maybe; - extensions: Maybe + extensions: Maybe; }; // Image is an image that represents the feed diff --git a/src/types/index.ts b/src/types/index.ts index 0b43b4b..2fa0549 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -2,3 +2,4 @@ export * from './Atom'; export * from './Extension'; export * from './Feed'; export * from './RSS'; +export * from './IParser'; diff --git a/test/AtomParser.test.ts b/test/AtomParser.test.ts index 115d239..8a8385f 100644 --- a/test/AtomParser.test.ts +++ b/test/AtomParser.test.ts @@ -1,6 +1,6 @@ import fs from 'fs'; import path from 'path'; -import { AtomParser } from '../src/Parsers/AtomParser'; +import { AtomParser } from '../src'; const feedPaths = [ path.join(__dirname, './stubs/atom/gitlab.xml'), From 2a9ed64e2a404684a67ad3297ff30a5443648725 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Fri, 14 Aug 2020 21:02:24 +0400 Subject: [PATCH 6/9] feat: sanitizer improvements --- src/utils/{sanitizing.ts => sanitizer.ts} | 281 ++++++---------------- tsconfig.json | 3 +- 2 files changed, 76 insertions(+), 208 deletions(-) rename src/utils/{sanitizing.ts => sanitizer.ts} (54%) diff --git a/src/utils/sanitizing.ts b/src/utils/sanitizer.ts similarity index 54% rename from src/utils/sanitizing.ts rename to src/utils/sanitizer.ts index 5e4e2ab..190ff0f 100644 --- a/src/utils/sanitizing.ts +++ b/src/utils/sanitizer.ts @@ -85,6 +85,7 @@ const ACCEPTABLE_ELEMENTS = new Set([ 'strong', 'sub', 'sup', + 'svg', 'table', 'tbody', 'td', @@ -127,6 +128,18 @@ const ACCEPTABLE_ATTRIBUTES = new Set([ 'width', ]); +const ACCEPTABLE_EMPTY_ELEMENTS = new Set([ + 'img', + 'video', + 'audio', + 'hr', + 'br', + 'canvas', + 'input', + 'area', + 'iframe', +]); + const MATHML_ELEMENTS = new Set([ 'annotation', 'annotation-xml', @@ -276,203 +289,15 @@ const MATHML_ATTRIBUTES = new Set([ 'xmlns:xlink', ]); -const SVG_ELEMENTS = new Set([ - 'a', - 'animate', - 'animateColor', - 'animateMotion', - 'animateTransform', - 'circle', - 'defs', - 'desc', - 'ellipse', - 'font-face', - 'font-face-name', - 'font-face-src', - 'foreignObject', - 'g', - 'glyph', - 'hkern', - 'line', - 'linearGradient', - 'marker', - 'metadata', - 'missing-glyph', - 'mpath', - 'path', - 'polygon', - 'polyline', - 'radialGradient', - 'rect', - 'set', - 'stop', - 'svg', - 'switch', - 'text', - 'title', - 'tspan', - 'use', -]); - -const SVG_ATTRIBUTES = new Set([ - 'accent-height', - 'accumulate', - 'additive', - 'alphabetic', - 'arabic-form', - 'ascent', - 'attributeName', - 'attributeType', - 'baseProfile', - 'bbox', - 'begin', - 'by', - 'calcMode', - 'cap-height', - 'class', - 'color', - 'color-rendering', - 'content', - 'cx', - 'cy', - 'd', - 'descent', - 'display', - 'dur', - 'dx', - 'dy', - 'end', - 'fill', - 'fill-opacity', - 'fill-rule', - 'font-family', - 'font-size', - 'font-stretch', - 'font-style', - 'font-variant', - 'font-weight', - 'from', - 'fx', - 'fy', - 'g1', - 'g2', - 'glyph-name', - 'gradientUnits', - 'hanging', - 'height', - 'horiz-adv-x', - 'horiz-origin-x', - 'id', - 'ideographic', - 'k', - 'keyPoints', - 'keySplines', - 'keyTimes', - 'lang', - 'marker-end', - 'marker-mid', - 'marker-start', - 'markerHeight', - 'markerUnits', - 'markerWidth', - 'mathematical', - 'max', - 'min', - 'name', - 'offset', - 'opacity', - 'orient', - 'origin', - 'overline-position', - 'overline-thickness', - 'panose-1', - 'path', - 'pathLength', - 'points', - 'preserveAspectRatio', - 'r', - 'refX', - 'refY', - 'repeatCount', - 'repeatDur', - 'requiredExtensions', - 'requiredFeatures', - 'restart', - 'rotate', - 'rx', - 'ry', - 'slope', - 'stemh', - 'stemv', - 'stop-color', - 'stop-opacity', - 'strikethrough-position', - 'strikethrough-thickness', - 'stroke', - 'stroke-dasharray', - 'stroke-dashoffset', - 'stroke-linecap', - 'stroke-linejoin', - 'stroke-miterlimit', - 'stroke-opacity', - 'stroke-width', - 'systemLanguage', - 'target', - 'text-anchor', - 'to', - 'transform', - 'type', - 'u1', - 'u2', - 'underline-position', - 'underline-thickness', - 'unicode', - 'unicode-range', - 'units-per-em', - 'values', - 'version', - 'viewBox', - 'visibility', - 'width', - 'widths', - 'x', - 'x-height', - 'x1', - 'x2', - 'xlink:actuate', - 'xlink:arcrole', - 'xlink:href', - 'xlink:role', - 'xlink:show', - 'xlink:title', - 'xlink:type', - 'xml:base', - 'xml:lang', - 'xml:space', - 'xmlns', - 'xmlns:xlink', - 'y', - 'y1', - 'y2', - 'zoomAndPan', -]); - -const ACCEPTABLE_SVG_ROPERTIES = new Set([ - 'fill', - 'fill-opacity', - 'fill-rule', - 'stroke', - 'stroke-linecap', - 'stroke-linejoin', - 'stroke-opacity', - 'stroke-width', +const ALL_ACCEPTABLE_ELEMENTS = new Set([ + ...ACCEPTABLE_ELEMENTS, + ...MATHML_ELEMENTS, ]); /** * Clear the given DOM three from unwanted elements and attributes. */ -export const sanitize = (doc: Document): Element => { - // const e = doc.body.cloneNode(true); +export const sanitize = (doc: Document): string => { const walker = doc.createTreeWalker( doc.body, NodeFilter.SHOW_ELEMENT + NodeFilter.SHOW_COMMENT @@ -480,30 +305,72 @@ export const sanitize = (doc: Document): Element => { while (walker.nextNode()) { const current = walker.currentNode as Element; + const nodeName = current.nodeName.toLowerCase(); + // Strip HTML comments + // Strip unacceptable elements if ( - // Strip HTML comments current.nodeType === Node.COMMENT_NODE || - // Strip empty elements - current.childNodes.length === 0 || - // Strip unacceptable elements - !ACCEPTABLE_ELEMENTS.has(current.nodeName.toLowerCase()) + !ALL_ACCEPTABLE_ELEMENTS.has(nodeName) ) { - const parent = current.parentNode!; + removeNodeFromDocument(walker, current); + continue; + } - parent.removeChild(current); + // Remove redundant empty elements + if ( + current.childNodes.length === 0 && + !ACCEPTABLE_EMPTY_ELEMENTS.has(nodeName) + ) { + removeNodeFromDocument(walker, current); + continue; + } - // Set currentNode to parent to prevent breaking of the walk - walker.currentNode = parent; + // Skip SVG checking + if (nodeName === 'svg') { + skipNodeChecking(walker, current); continue; } - current.getAttributeNames().forEach(attribute => { - if (!ACCEPTABLE_ATTRIBUTES.has(attribute)) { - current.removeAttribute(attribute); - } - }); + // Clear common elements' attributes + if (ACCEPTABLE_ELEMENTS.has(nodeName)) { + current.getAttributeNames().forEach(attribute => { + if (!ACCEPTABLE_ATTRIBUTES.has(attribute)) { + current.removeAttribute(attribute); + } + }); + // Clean MATHML elements' attributes + } else if (MATHML_ELEMENTS.has(nodeName)) { + current.getAttributeNames().forEach(attribute => { + if (!MATHML_ATTRIBUTES.has(attribute)) { + current.removeAttribute(attribute); + } + }); + } } - return doc.body; + return doc.body.innerHTML; +}; + +/** + * Helper that remove the node from the document and sets a currentNode + * of a walker object back to parent to continue walking. + */ +const removeNodeFromDocument = (walker: TreeWalker, node: Element) => { + const parent = node.parentNode!; + parent.removeChild(node); + + // Set currentNode to parent to prevent breaking of the walk + walker.currentNode = parent; +}; + +/** + * Sets currentNode to next available + */ +const skipNodeChecking = (walker: TreeWalker, node: Element) => { + const nextSibling = node.nextSibling; + + if (nextSibling !== null) { + walker.currentNode = nextSibling; + } }; diff --git a/tsconfig.json b/tsconfig.json index 032d00a..0665fca 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -28,6 +28,7 @@ }, "jsx": "react", "esModuleInterop": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "downlevelIteration": true } } From 4d92c3a68eac7873d2a4d169c9be5d9e9287da33 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Fri, 14 Aug 2020 21:03:43 +0400 Subject: [PATCH 7/9] tests: sanitizer covered with tests --- test/helpers.ts | 23 +++ test/sanitize.test.ts | 17 --- test/sanitizer.test.ts | 77 ++++++++++ test/stubs/content/content.html | 12 -- test/stubs/content/web.dev.html | 139 ------------------ test/stubs/sanitizer/allowed-attributes.html | 3 + .../sanitizer/allowed-empty-elements.html | 9 ++ test/stubs/sanitizer/empty-elements.html | 4 + test/stubs/sanitizer/html-comments.html | 6 + .../sanitizer/restricted-attributes.html | 3 + test/stubs/sanitizer/restricted-elements.html | 5 + test/stubs/sanitizer/svg.html | 7 + 12 files changed, 137 insertions(+), 168 deletions(-) create mode 100644 test/helpers.ts delete mode 100644 test/sanitize.test.ts create mode 100644 test/sanitizer.test.ts delete mode 100644 test/stubs/content/content.html delete mode 100644 test/stubs/content/web.dev.html create mode 100644 test/stubs/sanitizer/allowed-attributes.html create mode 100644 test/stubs/sanitizer/allowed-empty-elements.html create mode 100644 test/stubs/sanitizer/empty-elements.html create mode 100644 test/stubs/sanitizer/html-comments.html create mode 100644 test/stubs/sanitizer/restricted-attributes.html create mode 100644 test/stubs/sanitizer/restricted-elements.html create mode 100644 test/stubs/sanitizer/svg.html diff --git a/test/helpers.ts b/test/helpers.ts new file mode 100644 index 0000000..58fcb44 --- /dev/null +++ b/test/helpers.ts @@ -0,0 +1,23 @@ +import fs from 'fs'; +import path from 'path'; + +export const getStub = (basePath: string) => (stub: string) => { + return fs.readFileSync( + path.resolve(__dirname, `./stubs/${basePath}/${stub}`), + { encoding: 'utf8' } + ); +}; + +export const saveSnapshot = (basePath: string) => ( + stub: string, + content: string +) => { + const segments = stub.split('.'); + segments.splice(-1, 0, 'result'); + const fileName = segments.join('.'); + + fs.writeFileSync( + path.resolve(__dirname, `./stubs/${basePath}/${fileName}`), + content + ); +}; diff --git a/test/sanitize.test.ts b/test/sanitize.test.ts deleted file mode 100644 index 9ba1a90..0000000 --- a/test/sanitize.test.ts +++ /dev/null @@ -1,17 +0,0 @@ -import fs from 'fs'; -import path from 'path'; -import { sanitize } from '../src/utils/sanitizing'; - -const content = fs.readFileSync( - path.resolve(__dirname, './stubs/content/web.dev.html'), - { encoding: 'utf8' } -); - -describe('Content sanitization', () => { - it('should sanitize HTML', () => { - const doc = new DOMParser().parseFromString(content, 'text/html'); - const sanitized = sanitize(doc); - - console.log(sanitized.innerHTML); - }); -}); diff --git a/test/sanitizer.test.ts b/test/sanitizer.test.ts new file mode 100644 index 0000000..b27d07f --- /dev/null +++ b/test/sanitizer.test.ts @@ -0,0 +1,77 @@ +import { getStub } from './helpers'; +import { sanitize } from '../src/utils/sanitizer'; + +const getStubContent = getStub('sanitizer'); + +describe('Content sanitization', () => { + it('should remove unacceptable empty elements', () => { + const doc = new DOMParser().parseFromString( + getStubContent('empty-elements.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual(getStubContent('empty-elements.result.html')); + }); + + it('should preserve allowed empty elements', () => { + const doc = new DOMParser().parseFromString( + getStubContent('allowed-empty-elements.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual( + getStubContent('allowed-empty-elements.result.html') + ); + }); + + it('should remove restricted attributes', () => { + const doc = new DOMParser().parseFromString( + getStubContent('restricted-attributes.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual( + getStubContent('restricted-attributes.result.html') + ); + }); + + it('should preserve allowed attributes', () => { + const doc = new DOMParser().parseFromString( + getStubContent('allowed-attributes.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual( + getStubContent('allowed-attributes.result.html') + ); + }); + + it('should remove restricted elements', () => { + const doc = new DOMParser().parseFromString( + getStubContent('restricted-elements.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual( + getStubContent('restricted-elements.result.html') + ); + }); + + it('should remove html comments', () => { + const doc = new DOMParser().parseFromString( + getStubContent('html-comments.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual(getStubContent('html-comments.result.html')); + }); + + it('should preserve svg elements', () => { + const doc = new DOMParser().parseFromString( + getStubContent('svg.html'), + 'text/html' + ); + + expect(sanitize(doc)).toEqual(getStubContent('svg.result.html')); + }); +}); diff --git a/test/stubs/content/content.html b/test/stubs/content/content.html deleted file mode 100644 index 58c5eb9..0000000 --- a/test/stubs/content/content.html +++ /dev/null @@ -1,12 +0,0 @@ - -

- - Hello, World! -

- -
- -

- diff --git a/test/stubs/content/web.dev.html b/test/stubs/content/web.dev.html deleted file mode 100644 index ff3dc1f..0000000 --- a/test/stubs/content/web.dev.html +++ /dev/null @@ -1,139 +0,0 @@ -

With more and more users spending most of their time in the browser, richly interactive websites, games, remote - desktop streaming, and application streaming strive to provide an immersive, full screen experience. To accomplish - this, sites need access to special keys and keyboard shortcuts while they are in full screen mode, so that they can be - used for navigation, menus, or gaming functionality. Some examples of the keys that may be required are Esc, - Alt + Tab, Cmd + `, and Ctrl + N.

By default, - these keys are not available to the web application because they are captured by the browser or the underlying - operating system. The Keyboard Lock API enables websites to use all available keys allowed by the host OS (see Browser compatibility).

-
Ubuntu Linux streamed to a browser tab in macOS Chrome (not running in full screen mode yet). -
The problem: a streamed Ubuntu Linux remote desktop not running in full screen mode and without - active keyboard lock, so system keys are still captured by the macOS host operating system and the experience is - not immersive yet. -
-

Using the Keyboard Lock API

The Keyboard interface of the the - Keyboard API provides functions that toggle capturing of key presses from the physical keyboard as well as getting - information about the used keyboard - layout.

Prerequisite

There are two different types - of full screen available in modern browsers: JavaScript-initiated via the Fullscreen API and user-initiated via a - keyboard shortcut. The Keyboard Lock API is only available when JavaScript-initiated full - screen is active. Here's an example of JavaScript-initiated full screen:

-
-
await document.documentElement.requestFullscreen();
-

Browser compatibility

You can see browser - compatibility on Can I use. Note that not all system - keys can be locked. This varies from operating system to operating system. For example, follow crbug.com/855738 for progress updates on system keyboard lock for macOS.

Feature detection -

You can use the following pattern to check if the Keyboard Lock API is supported:

-
-
if ('keyboard' in navigator && 'lock' in navigator.keyboard) {
// Supported!
}
-

Locking the keyboard

The lock() method of the Keyboard - interface returns a promise after enabling the capture of key presses for any or all of the keys on the physical - keyboard. This method can only capture keys that are granted access by the underlying operating system. The lock() - method takes an array of one or more key codes to lock. If no key codes are provided, all keys will be locked. A list - of valid key code values is available in the UI - Events KeyboardEvent code Values spec.

Capturing all keys

The following example captures all - key presses.

-
-
navigator.keyboard.lock();
-

Capturing specific keys

The following example - captures the W, A, S, and D keys. It captures these keys regardless of - which modifiers are used with the key press. Assuming a US QWERTY layout, registering "KeyW" - ensures that W, Shift + W, Control + W, Control + - Shift + W, and all other key modifier combinations with W are sent to the app. The - same applies to "KeyA", "KeyS", and "KeyD".

-
-
await navigator.keyboard.lock([
"KeyW",
"KeyA",
"KeyS",
"KeyD",
]);
-

You can respond to captured key presses using keyboard events. For example this code uses the - onkeydown event:

-
-
document.addEventListener('keydown', (e) => {
if ((e.code === 'KeyA') && !(event.ctrlKey || event.metaKey)) {
// Do something when the 'A' key was pressed, but only
// when not in combination with the command or control key.
}
});
-

Unlocking the keyboard

The unlock() method unlocks all - keys captured by the lock() method and returns synchronously.

-
-
navigator.keyboard.unlock();
-

When a document is closed, the browser always implicitly calls unlock().

Demo

You can test the Keyboard Lock API by running - the demo on Glitch. Be sure to check out the source code. Clicking the Enter full screen button - below launches the demo in a new window so it can enter full screen mode.

-
- -

Security Considerations

One concern with - this API is that it could be used to grab all of the keys and (in conjunction with the Fullscreen API and the PointerLock API) prevent the user from - exiting the web page. To prevent this, the spec requires the browser to provide a way for the user to exit from - keyboard lock even if all of the keys are requested by the API. In Chrome, this escape hatch is a long (two second) - Esc key press to trigger an exit from Keyboard Lock.

-

Acknowledgements -

This article was reviewed by Joe Medley and Kayce Basques. The Keyboard Lock spec is authored by Gary Kacmarcik and Jamie Walch. Hero image by Ken Suarez on Unsplash. -

diff --git a/test/stubs/sanitizer/allowed-attributes.html b/test/stubs/sanitizer/allowed-attributes.html new file mode 100644 index 0000000..24dea86 --- /dev/null +++ b/test/stubs/sanitizer/allowed-attributes.html @@ -0,0 +1,3 @@ +Alt text +test + diff --git a/test/stubs/sanitizer/allowed-empty-elements.html b/test/stubs/sanitizer/allowed-empty-elements.html new file mode 100644 index 0000000..ad90a16 --- /dev/null +++ b/test/stubs/sanitizer/allowed-empty-elements.html @@ -0,0 +1,9 @@ + + + + +
+
+ + + diff --git a/test/stubs/sanitizer/empty-elements.html b/test/stubs/sanitizer/empty-elements.html new file mode 100644 index 0000000..88d7f0d --- /dev/null +++ b/test/stubs/sanitizer/empty-elements.html @@ -0,0 +1,4 @@ +

+
+

Hello

+ diff --git a/test/stubs/sanitizer/html-comments.html b/test/stubs/sanitizer/html-comments.html new file mode 100644 index 0000000..679c0d8 --- /dev/null +++ b/test/stubs/sanitizer/html-comments.html @@ -0,0 +1,6 @@ +

+ + Hello, World! + +

+ diff --git a/test/stubs/sanitizer/restricted-attributes.html b/test/stubs/sanitizer/restricted-attributes.html new file mode 100644 index 0000000..9a0d429 --- /dev/null +++ b/test/stubs/sanitizer/restricted-attributes.html @@ -0,0 +1,3 @@ +

class attribute

+
style attribute
+

js attributes

diff --git a/test/stubs/sanitizer/restricted-elements.html b/test/stubs/sanitizer/restricted-elements.html new file mode 100644 index 0000000..7504e12 --- /dev/null +++ b/test/stubs/sanitizer/restricted-elements.html @@ -0,0 +1,5 @@ + + + + +text diff --git a/test/stubs/sanitizer/svg.html b/test/stubs/sanitizer/svg.html new file mode 100644 index 0000000..2433f8e --- /dev/null +++ b/test/stubs/sanitizer/svg.html @@ -0,0 +1,7 @@ + + + + I love SVG! + + Sorry, your browser does not support inline SVG. + From 0ecaa7dbd8c3f235332864f4fb18e52d79544094 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Fri, 14 Aug 2020 21:05:17 +0400 Subject: [PATCH 8/9] feat!: accept parameters in parser constructor --- src/Parser.ts | 33 ++++++++++++++++++++------------- src/Parsers/AtomParser.ts | 22 ++++++++-------------- src/Parsers/BaseParser.ts | 15 +++++++++++++++ src/Parsers/JSONParser.ts | 12 ++++++++++++ src/Parsers/JsonParser.ts | 10 ---------- src/Parsers/RSSParser.ts | 21 ++++++++------------- src/types/IParser.ts | 6 ------ test/RssParser.test.ts | 8 ++++---- 8 files changed, 67 insertions(+), 60 deletions(-) create mode 100644 src/Parsers/BaseParser.ts create mode 100644 src/Parsers/JSONParser.ts delete mode 100644 src/Parsers/JsonParser.ts diff --git a/src/Parser.ts b/src/Parser.ts index 91be34c..9725761 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -6,6 +6,7 @@ import { RSSFeedAdapter } from './Adapter/RSSFeedAdapter'; import { AtomFeedAdapter } from './Adapter/AtomFeedAdapter'; import { NetworkError } from './Errors/NetworkError'; import { FeedTypeError } from './Errors/FeedTypeError'; +// @ts-ignore import { version } from '../package.json'; export type PFPOptions = { @@ -40,17 +41,8 @@ export class Parser { /** * Changed options will be merged with the defaults. */ - constructor(options: PFPOptions) { - this.options = { - ...options, - fetchOptions: { - ...options.fetchOptions, - headers: { - ...DEFAULT_OPTIONS.fetchOptions.headers, - ...options.fetchOptions?.headers, - }, - }, - }; + constructor(options?: PFPOptions) { + this.options = options ? mergeOptions(options) : DEFAULT_OPTIONS; } /** @@ -75,16 +67,31 @@ export class Parser { * Parse a feed from the given XML document. */ public parseDocument(doc: Document): Feed { + const { sanitization } = this.options; const type = XmlFeedTypeDetector.detect(doc); if (type === FeedType.RSS) { - return RSSFeedAdapter.adapt(new RSSParser(doc).parse()); + return RSSFeedAdapter.adapt(new RSSParser({ sanitization }).parse(doc)); } if (type === FeedType.Atom) { - return AtomFeedAdapter.adapt(new AtomParser(doc).parse()); + return AtomFeedAdapter.adapt(new AtomParser({ sanitization }).parse(doc)); } throw new FeedTypeError('Unknown feed type'); } } + +/** + * Merge provided options with the defaults. + */ +const mergeOptions = (options: PFPOptions) => ({ + ...options, + fetchOptions: { + ...options.fetchOptions, + headers: { + ...DEFAULT_OPTIONS.fetchOptions.headers, + ...options.fetchOptions?.headers, + }, + }, +}); diff --git a/src/Parsers/AtomParser.ts b/src/Parsers/AtomParser.ts index 8a1f30d..e651ef5 100644 --- a/src/Parsers/AtomParser.ts +++ b/src/Parsers/AtomParser.ts @@ -7,12 +7,14 @@ import { AtomLink, AtomPerson, AtomSource, + IParser, } from '../types'; import { getExtensionName, isExtension, parseExtension, } from '../utils/extensions'; +import { BaseParser, ParserOptions } from './BaseParser'; // Atom elements which contain URIs // https://tools.ietf.org/html/rfc4287 @@ -36,17 +38,14 @@ import { /** * Parser for Atom feeds */ -export class AtomParser { +export class AtomParser extends BaseParser implements IParser { private readonly entry: AtomEntry; private readonly source: AtomSource; private readonly person: AtomPerson; private feed: AtomFeed; - private document: Document; - // private baseURL: Maybe; - constructor(document: Document) { - this.document = document; - // this.baseURL = null; + constructor(options?: ParserOptions) { + super(options); this.feed = { id: null, @@ -100,19 +99,14 @@ export class AtomParser { this.person = { email: null, name: null, uri: null }; } - public parse(): AtomFeed { - const root = this.document.firstElementChild; + public parse(doc: Document): AtomFeed { + const root = doc.firstElementChild; if (root === null) { throw new Error('No root node'); } - // this.baseURL = root.getAttributeNS('xml', 'base'); - - const walker = window.document.createTreeWalker( - root, - NodeFilter.SHOW_ELEMENT - ); + const walker = doc.createTreeWalker(root, NodeFilter.SHOW_ELEMENT); walker.firstChild(); this.parseRoot(walker); diff --git a/src/Parsers/BaseParser.ts b/src/Parsers/BaseParser.ts new file mode 100644 index 0000000..96aff80 --- /dev/null +++ b/src/Parsers/BaseParser.ts @@ -0,0 +1,15 @@ +export type ParserOptions = { + sanitization?: boolean; +}; + +const DEFAULT_OPTIONS = { + sanitization: true, +}; + +export abstract class BaseParser { + protected options: ParserOptions; + + protected constructor(options?: ParserOptions) { + this.options = { ...DEFAULT_OPTIONS, ...options }; + } +} diff --git a/src/Parsers/JSONParser.ts b/src/Parsers/JSONParser.ts new file mode 100644 index 0000000..3d00301 --- /dev/null +++ b/src/Parsers/JSONParser.ts @@ -0,0 +1,12 @@ +// @ts-nocheck +import { BaseParser, ParserOptions } from './BaseParser'; +import { IParser } from '../types'; + +/** + * @todo + */ +export class JSONParser extends BaseParser implements IParser { + constructor(options: ParserOptions) { + super(options); + } +} diff --git a/src/Parsers/JsonParser.ts b/src/Parsers/JsonParser.ts deleted file mode 100644 index c362b3a..0000000 --- a/src/Parsers/JsonParser.ts +++ /dev/null @@ -1,10 +0,0 @@ -/** - * @todo - */ -export class JsonParser { - content: JSON; - - constructor(content: JSON) { - this.content = content; - } -} diff --git a/src/Parsers/RSSParser.ts b/src/Parsers/RSSParser.ts index 60d0917..aa34b24 100644 --- a/src/Parsers/RSSParser.ts +++ b/src/Parsers/RSSParser.ts @@ -9,29 +9,29 @@ import { RSSCloud, RSSTextInput, IParser, - ParserOptions, } from '../types'; + import { append } from '../utils/collection'; import { getExtensionName, isExtension, parseExtension, } from '../utils/extensions'; -import { sanitize } from '../utils/sanitizing'; +import { sanitize } from '../utils/sanitizer'; +import { BaseParser, ParserOptions } from './BaseParser'; /** * Parser for RSS feeds */ -export class RSSParser implements IParser { +export class RSSParser extends BaseParser implements IParser { feed: RSSFeed; private readonly image: RSSImage; private readonly item: RSSItem; private readonly guid: RSSGUID; private readonly textInput: RSSTextInput; - private options: ParserOptions; - constructor(options: ParserOptions) { - this.options = options; + constructor(options?: ParserOptions) { + super(options); this.image = { description: null, @@ -98,10 +98,7 @@ export class RSSParser implements IParser { throw new Error('No root node'); } - const walker = window.document.createTreeWalker( - root, - NodeFilter.SHOW_ELEMENT - ); + const walker = doc.createTreeWalker(root, NodeFilter.SHOW_ELEMENT); walker.firstChild(); do { @@ -346,8 +343,6 @@ export class RSSParser implements IParser { if (text === null) return null; const doc = new DOMParser().parseFromString(text, 'text/html'); - const element = sanitize(doc); - - return element.innerHTML; + return sanitize(doc); } } diff --git a/src/types/IParser.ts b/src/types/IParser.ts index 959f2a3..15c4557 100644 --- a/src/types/IParser.ts +++ b/src/types/IParser.ts @@ -1,9 +1,3 @@ -export type ParserOptions = { - sanitization: boolean; -}; - export interface IParser { - constructor(options: ParserOptions): void; - parse(doc: Document): any; } diff --git a/test/RssParser.test.ts b/test/RssParser.test.ts index be04c02..7d06367 100644 --- a/test/RssParser.test.ts +++ b/test/RssParser.test.ts @@ -21,9 +21,9 @@ it('should parse RSS feeds', () => { feedPaths.forEach(p => { const xml = fs.readFileSync(p, { encoding: 'utf8' }); const doc = new DOMParser().parseFromString(xml, 'application/xml'); - const parser = new RSSParser(doc); + const parser = new RSSParser(); - expect(() => parser.parse()).not.toThrowError(); + expect(() => parser.parse(doc)).not.toThrowError(); }); }); @@ -31,8 +31,8 @@ it('should parse canonical feed', () => { const canonical = fs.readFileSync(canonicalFeedPath, { encoding: 'utf8' }); const expected = fs.readFileSync(canonicalExpectation, { encoding: 'utf8' }); const doc = new DOMParser().parseFromString(canonical, 'application/xml'); - const parser = new RSSParser(doc); - const data = parser.parse(); + const parser = new RSSParser(); + const data = parser.parse(doc); expect(data).toEqual(JSON.parse(expected)); }); From fee8ca124d852be12d7411c3cd09195be8e4afe0 Mon Sep 17 00:00:00 2001 From: Eugene Dzhumak Date: Fri, 14 Aug 2020 21:13:44 +0400 Subject: [PATCH 9/9] tests: sanitizer stubs --- test/stubs/sanitizer/allowed-attributes.result.html | 3 +++ test/stubs/sanitizer/allowed-empty-elements.result.html | 9 +++++++++ test/stubs/sanitizer/empty-elements.result.html | 4 ++++ test/stubs/sanitizer/html-comments.result.html | 6 ++++++ test/stubs/sanitizer/restricted-attributes.result.html | 3 +++ test/stubs/sanitizer/restricted-elements.result.html | 2 ++ test/stubs/sanitizer/svg.result.html | 7 +++++++ 7 files changed, 34 insertions(+) create mode 100644 test/stubs/sanitizer/allowed-attributes.result.html create mode 100644 test/stubs/sanitizer/allowed-empty-elements.result.html create mode 100644 test/stubs/sanitizer/empty-elements.result.html create mode 100644 test/stubs/sanitizer/html-comments.result.html create mode 100644 test/stubs/sanitizer/restricted-attributes.result.html create mode 100644 test/stubs/sanitizer/restricted-elements.result.html create mode 100644 test/stubs/sanitizer/svg.result.html diff --git a/test/stubs/sanitizer/allowed-attributes.result.html b/test/stubs/sanitizer/allowed-attributes.result.html new file mode 100644 index 0000000..24dea86 --- /dev/null +++ b/test/stubs/sanitizer/allowed-attributes.result.html @@ -0,0 +1,3 @@ +Alt text +test + diff --git a/test/stubs/sanitizer/allowed-empty-elements.result.html b/test/stubs/sanitizer/allowed-empty-elements.result.html new file mode 100644 index 0000000..ad90a16 --- /dev/null +++ b/test/stubs/sanitizer/allowed-empty-elements.result.html @@ -0,0 +1,9 @@ + + + + +
+
+ + + diff --git a/test/stubs/sanitizer/empty-elements.result.html b/test/stubs/sanitizer/empty-elements.result.html new file mode 100644 index 0000000..a89756d --- /dev/null +++ b/test/stubs/sanitizer/empty-elements.result.html @@ -0,0 +1,4 @@ + + +

Hello

+ diff --git a/test/stubs/sanitizer/html-comments.result.html b/test/stubs/sanitizer/html-comments.result.html new file mode 100644 index 0000000..a286a60 --- /dev/null +++ b/test/stubs/sanitizer/html-comments.result.html @@ -0,0 +1,6 @@ +

+ + Hello, World! + +

+ diff --git a/test/stubs/sanitizer/restricted-attributes.result.html b/test/stubs/sanitizer/restricted-attributes.result.html new file mode 100644 index 0000000..4964abd --- /dev/null +++ b/test/stubs/sanitizer/restricted-attributes.result.html @@ -0,0 +1,3 @@ +

class attribute

+
style attribute
+

js attributes

diff --git a/test/stubs/sanitizer/restricted-elements.result.html b/test/stubs/sanitizer/restricted-elements.result.html new file mode 100644 index 0000000..139597f --- /dev/null +++ b/test/stubs/sanitizer/restricted-elements.result.html @@ -0,0 +1,2 @@ + + diff --git a/test/stubs/sanitizer/svg.result.html b/test/stubs/sanitizer/svg.result.html new file mode 100644 index 0000000..2433f8e --- /dev/null +++ b/test/stubs/sanitizer/svg.result.html @@ -0,0 +1,7 @@ + + + + I love SVG! + + Sorry, your browser does not support inline SVG. +