|
| 1 | +<!DOCTYPE HTML> |
| 2 | +<html> |
| 3 | +<head> |
| 4 | + <title>Pure JavaScript HTML Parse - Demo</title> |
| 5 | + <link href="http://netdna.bootstrapcdn.com/twitter-bootstrap/2.3.2/css/bootstrap-combined.min.css" rel="stylesheet"> |
| 6 | +</head> |
| 7 | +<body> |
| 8 | + <div class="container"> |
| 9 | + |
| 10 | + <div class="row"> |
| 11 | + <div class="hero-unit span12"> |
| 12 | + <h1>Pure JavaScript HTML Parser</h1> |
| 13 | + <p>All-in-one: XML Serializer, DOM Builder, DOM Document Creator, A SAX-style API </p> |
| 14 | + <p> |
| 15 | + <a class="btn btn-primary btn-large" href="https://github.com/blowsie/Pure-JavaScript-HTML-Parser">Learn more |
| 16 | + </a> |
| 17 | + </p> |
| 18 | + </div> |
| 19 | + |
| 20 | + </div> |
| 21 | + <div class="row"> |
| 22 | + <div class="span8"> |
| 23 | + <form id="form"> |
| 24 | + <label>Input (HTML):</label><br /> |
| 25 | + <textarea cols="60" rows="10" id="input" style="width: 100%;"></textarea><br /> |
| 26 | + <input type="submit" value="Run" /> |
| 27 | + </form> |
| 28 | + <br /> |
| 29 | + <label>Output (XML):</label><br /> |
| 30 | + <textarea cols="60" rows="10" id="output" style="width: 100%;"></textarea> |
| 31 | + </div> |
| 32 | + <div class="span4"> |
| 33 | + While this library doesn't cover the full gamut of possible weirdness that HTML provides, it does handle a lot of the most obvious stuff. All of the following are accounted for: |
| 34 | + |
| 35 | + |
| 36 | + <ul> |
| 37 | + <li>Unclosed Tags: |
| 38 | + <pre>HTMLtoXML("<p><b>Hello") == '<p><b>Hello</b></p>'</pre> |
| 39 | + </li> |
| 40 | + <li>Empty Elements: |
| 41 | + <pre>HTMLtoXML("<img src=test.jpg>") == '<img src="test.jpg"/>'</pre> |
| 42 | + </li> |
| 43 | + <li>Block vs. Inline Elements: |
| 44 | + <pre>HTMLtoXML("<b>Hello <p>John") == '<b>Hello </b><p>John</p>'</pre> |
| 45 | + </li> |
| 46 | + <li>Self-closing Elements: |
| 47 | + <pre>HTMLtoXML("<p>Hello<p>World") == '<p>Hello</p><p>World</p>'</pre> |
| 48 | + </li> |
| 49 | + <li>Attributes Without Values: |
| 50 | + <pre>HTMLtoXML("<input disabled>") == '<input disabled="disabled"/>'</pre> |
| 51 | + </li> |
| 52 | + </ul> |
| 53 | + |
| 54 | + <b>Note:</b> It does <b>not</b> take into account where in the document an element should exist. Right now you can put block elements in a head or th inside a p and it'll happily accept them. It's not entirely clear how the logic should work for those, but it's something that I'm open to exploring. |
| 55 | + </div> |
| 56 | + </div> |
| 57 | + |
| 58 | + </div> |
| 59 | + <script src="/htmlparser.js"></script> |
| 60 | + <script> |
| 61 | + window.onload = function () { |
| 62 | + var input = document.getElementById("input"); |
| 63 | + var output = document.getElementById("output"); |
| 64 | + var form = document.getElementById("form"); |
| 65 | + |
| 66 | + input.value = "<p>hello <b style='test foo' disabled align=\"b\\\"ar\">john <a href='http://ejohn.org/'>resig</b><img src=test.jpg></img><div>test</div><p>hello world"; |
| 67 | + output.value = ""; |
| 68 | + |
| 69 | + form.onsubmit = function (e) { |
| 70 | + if (e) e.preventDefault(); |
| 71 | + if (typeof event != "undefined") event.returnValue = false; |
| 72 | + |
| 73 | + output.value = HTMLtoXML(input.value); |
| 74 | + return false; |
| 75 | + }; |
| 76 | + }; |
| 77 | +</script> |
| 78 | +</body> |
| 79 | +</html> |
0 commit comments