From 7563929d2b95cf1530a4c6eaaf29678f784845e7 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Mon, 12 Mar 2018 17:03:37 -0700
Subject: [PATCH 01/33] update README to point to the spec

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e266555..c8eee22 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,4 @@ css
 ===
 [![GoDoc](https://godoc.org/github.com/gorilla/css?status.svg)](https://godoc.org/github.com/gorilla/css) [![Build Status](https://travis-ci.org/gorilla/css.png?branch=master)](https://travis-ci.org/gorilla/css)
 
-A CSS3 tokenizer.
+A CSS3 tokenizer based on https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms

From 55f89738f4941e08660ed869c7033c459544fd08 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Mon, 12 Mar 2018 17:12:01 -0700
Subject: [PATCH 02/33] Export TokenType, add missing token types

---
 scanner/scanner.go | 44 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/scanner/scanner.go b/scanner/scanner.go
index 23fa740..88d73bf 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -12,17 +12,17 @@ import (
 	"unicode/utf8"
 )
 
-// tokenType identifies the type of lexical tokens.
-type tokenType int
+// TokenType identifies the type of lexical tokens.
+type TokenType int
 
 // String returns a string representation of the token type.
-func (t tokenType) String() string {
+func (t TokenType) String() string {
 	return tokenNames[t]
 }
 
 // Token represents a token and the corresponding string.
 type Token struct {
-	Type   tokenType
+	Type   TokenType
 	Value  string
 	Line   int
 	Column int
@@ -57,7 +57,7 @@ const (
 	TokenUnicodeRange
 	TokenCDO
 	TokenCDC
-	TokenS
+	TokenS // whitespace-token
 	TokenComment
 	TokenFunction
 	TokenIncludes
@@ -65,12 +65,28 @@ const (
 	TokenPrefixMatch
 	TokenSuffixMatch
 	TokenSubstringMatch
-	TokenChar
+	TokenDelim
 	TokenBOM
+	// Added later
+	TokenBadString
+	TokenBadURI
+	TokenColumn
+	TokenColon
+	TokenSemicolon
+	TokenComma
+	TokenOpenBracket
+	TokenCloseBracket
+	TokenOpenParen
+	TokenCloseParen
+	TokenOpenBrace
+	TokenCloseBrace
 )
 
+// backwards compatibility
+const TokenChar = TokenDelim
+
 // tokenNames maps tokenType's to their names. Used for conversion to string.
-var tokenNames = map[tokenType]string{
+var tokenNames = map[TokenType]string{
 	TokenError:          "error",
 	TokenEOF:            "EOF",
 	TokenIdent:          "IDENT",
@@ -92,8 +108,20 @@ var tokenNames = map[tokenType]string{
 	TokenPrefixMatch:    "PREFIXMATCH",
 	TokenSuffixMatch:    "SUFFIXMATCH",
 	TokenSubstringMatch: "SUBSTRINGMATCH",
-	TokenChar:           "CHAR",
+	TokenDelim:          "DELIM",
 	TokenBOM:            "BOM",
+	TokenBadString:      "BAD-STRING",
+	TokenBadURI:         "BAD-URI",
+	TokenColumn:         "COLUMN",
+	TokenColon:          "COLON",
+	TokenSemicolon:      "SEMICOLON",
+	TokenComma:          "COMMA",
+	TokenOpenBracket:    "[",
+	TokenCloseBracket:   "]",
+	TokenOpenParen:      "(",
+	TokenCloseParen:     ")",
+	TokenOpenBrace:      "{",
+	TokenCloseBrace:     "}",
 }
 
 // Macros and productions -----------------------------------------------------

From 63286005d87c505107be7eb61669fbd4bd70bba5 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 13 Mar 2018 10:17:05 -0700
Subject: [PATCH 03/33] Add a text/transform preprocessor for input

---
 scanner/crlf.go | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 scanner/crlf.go

diff --git a/scanner/crlf.go b/scanner/crlf.go
new file mode 100644
index 0000000..ba5c7dc
--- /dev/null
+++ b/scanner/crlf.go
@@ -0,0 +1,62 @@
+package scanner
+
+// The crlf package helps in dealing with files that have DOS-style CR/LF line
+// endings.
+//
+// Copyright (c) 2015 Andy Balholm. Licensed under 2-Clause BSD.
+//
+// package crlf
+
+import (
+	"io"
+
+	"golang.org/x/text/transform"
+)
+
+// Normalize takes CRLF, CR, or LF line endings in src, and converts them
+// to LF in dst.
+//
+// cssparse: Also replace null bytes with U+FFFD REPLACEMENT CHARACTER.
+type normalize struct {
+	prev byte
+}
+
+const replacementCharacter = "\uFFFD"
+
+func (n *normalize) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	for nDst < len(dst) && nSrc < len(src) {
+		c := src[nSrc]
+		switch c {
+		case '\r':
+			dst[nDst] = '\n'
+		case '\n':
+			if n.prev == '\r' {
+				nSrc++
+				n.prev = c
+				continue
+			}
+			dst[nDst] = '\n'
+		case 0:
+			// nb: len(replacementCharacter) == 3
+			if nDst+3 >= len(dst) {
+				err = transform.ErrShortDst
+				return
+			}
+			copy(dst[nDst:], replacementCharacter[:])
+			nDst += 2
+		default:
+			dst[nDst] = c
+		}
+		n.prev = c
+		nDst++
+		nSrc++
+	}
+	if nSrc < len(src) {
+		err = transform.ErrShortDst
+	}
+	return
+}
+
+func (n *normalize) Reset() {
+	n.prev = 0
+}

From c02b43fa8daf5e8d99d0b3cff09f2dad5da75f54 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 13 Mar 2018 12:59:14 -0700
Subject: [PATCH 04/33] implement the 'consume a token' algorithm

---
 scanner/scanner.go   | 111 +-------------
 scanner/token.go     | 228 +++++++++++++++++++++++++++++
 scanner/tokenizer.go | 335 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 564 insertions(+), 110 deletions(-)
 create mode 100644 scanner/token.go
 create mode 100644 scanner/tokenizer.go

diff --git a/scanner/scanner.go b/scanner/scanner.go
index 88d73bf..cbe5fdf 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -12,22 +12,6 @@ import (
 	"unicode/utf8"
 )
 
-// TokenType identifies the type of lexical tokens.
-type TokenType int
-
-// String returns a string representation of the token type.
-func (t TokenType) String() string {
-	return tokenNames[t]
-}
-
-// Token represents a token and the corresponding string.
-type Token struct {
-	Type   TokenType
-	Value  string
-	Line   int
-	Column int
-}
-
 // String returns a string representation of the token.
 func (t *Token) String() string {
 	if len(t.Value) > 10 {
@@ -40,90 +24,6 @@ func (t *Token) String() string {
 
 // All tokens -----------------------------------------------------------------
 
-// The complete list of tokens in CSS3.
-const (
-	// Scanner flags.
-	TokenError tokenType = iota
-	TokenEOF
-	// From now on, only tokens from the CSS specification.
-	TokenIdent
-	TokenAtKeyword
-	TokenString
-	TokenHash
-	TokenNumber
-	TokenPercentage
-	TokenDimension
-	TokenURI
-	TokenUnicodeRange
-	TokenCDO
-	TokenCDC
-	TokenS // whitespace-token
-	TokenComment
-	TokenFunction
-	TokenIncludes
-	TokenDashMatch
-	TokenPrefixMatch
-	TokenSuffixMatch
-	TokenSubstringMatch
-	TokenDelim
-	TokenBOM
-	// Added later
-	TokenBadString
-	TokenBadURI
-	TokenColumn
-	TokenColon
-	TokenSemicolon
-	TokenComma
-	TokenOpenBracket
-	TokenCloseBracket
-	TokenOpenParen
-	TokenCloseParen
-	TokenOpenBrace
-	TokenCloseBrace
-)
-
-// backwards compatibility
-const TokenChar = TokenDelim
-
-// tokenNames maps tokenType's to their names. Used for conversion to string.
-var tokenNames = map[TokenType]string{
-	TokenError:          "error",
-	TokenEOF:            "EOF",
-	TokenIdent:          "IDENT",
-	TokenAtKeyword:      "ATKEYWORD",
-	TokenString:         "STRING",
-	TokenHash:           "HASH",
-	TokenNumber:         "NUMBER",
-	TokenPercentage:     "PERCENTAGE",
-	TokenDimension:      "DIMENSION",
-	TokenURI:            "URI",
-	TokenUnicodeRange:   "UNICODE-RANGE",
-	TokenCDO:            "CDO",
-	TokenCDC:            "CDC",
-	TokenS:              "S",
-	TokenComment:        "COMMENT",
-	TokenFunction:       "FUNCTION",
-	TokenIncludes:       "INCLUDES",
-	TokenDashMatch:      "DASHMATCH",
-	TokenPrefixMatch:    "PREFIXMATCH",
-	TokenSuffixMatch:    "SUFFIXMATCH",
-	TokenSubstringMatch: "SUBSTRINGMATCH",
-	TokenDelim:          "DELIM",
-	TokenBOM:            "BOM",
-	TokenBadString:      "BAD-STRING",
-	TokenBadURI:         "BAD-URI",
-	TokenColumn:         "COLUMN",
-	TokenColon:          "COLON",
-	TokenSemicolon:      "SEMICOLON",
-	TokenComma:          "COMMA",
-	TokenOpenBracket:    "[",
-	TokenCloseBracket:   "]",
-	TokenOpenParen:      "(",
-	TokenCloseParen:     ")",
-	TokenOpenBrace:      "{",
-	TokenCloseBrace:     "}",
-}
-
 // Macros and productions -----------------------------------------------------
 // http://www.w3.org/TR/css3-syntax/#tokenization
 
@@ -217,7 +117,7 @@ func init() {
 // Scanner --------------------------------------------------------------------
 
 // New returns a new CSS scanner for the given input.
-func New(input string) *Scanner {
+func New(r *bufio.Reader) *Scanner {
 	// Normalize newlines.
 	input = strings.Replace(input, "\r\n", "\n", -1)
 	return &Scanner{
@@ -227,15 +127,6 @@ func New(input string) *Scanner {
 	}
 }
 
-// Scanner scans an input and emits tokens following the CSS3 specification.
-type Scanner struct {
-	input string
-	pos   int
-	row   int
-	col   int
-	err   *Token
-}
-
 // Next returns the next token from the input.
 //
 // At the end of the input the token type is TokenEOF.
diff --git a/scanner/token.go b/scanner/token.go
new file mode 100644
index 0000000..3a7dd27
--- /dev/null
+++ b/scanner/token.go
@@ -0,0 +1,228 @@
+// Copyright 2018 Kane York.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+// TokenType identifies the type of lexical tokens.
+type TokenType int
+
+// String returns a string representation of the token type.
+func (t TokenType) String() string {
+	return tokenNames[t]
+}
+
+// Stop tokens are TokenError, TokenEOF, TokenBadEscape,
+// TokenBadString, TokenBadURI.  A consumer that does not want to tolerate
+// parsing errors should stop parsing when this returns true.
+func (t TokenType) StopToken() bool {
+	return t == TokenError || t == TokenEOF || t == TokenBadEscape || t ==
+		TokenBadString || t == TokenBadURI
+}
+
+// Simple tokens TODO figure out a useful definition for this.
+func (t TokenType) SimpleToken() bool {
+	if t.StopToken() {
+		return false
+	}
+	if t == TokenHash || t == TokenNumber || t == TokenPercentage || t == TokenDimension || t == TokenUnicodeRange {
+		return false
+	}
+	return true
+}
+
+// Token represents a token in the CSS syntax.
+type Token struct {
+	Type   TokenType
+	String string
+	// Extra data for the token beyond a simple string.
+	// Will always be a pointer to a "Token*Extra" type in this package.
+	Extra TokenExtra
+}
+
+// The complete list of tokens in CSS Syntax Level 3.
+const (
+	// Scanner flags.
+	TokenError tokenType = iota
+	TokenEOF
+	// From now on, only tokens from the CSS specification.
+	TokenIdent
+	TokenAtKeyword
+	TokenString
+	TokenHash
+	TokenNumber
+	TokenPercentage
+	TokenDimension
+	TokenURI
+	TokenUnicodeRange
+	TokenCDO
+	TokenCDC
+	// Whitespace
+	TokenS
+	// CSS Syntax Level 3 removes comments from the token stream, but they are
+	// preserved here.
+	TokenComment
+	TokenFunction
+	TokenIncludes
+	TokenDashMatch
+	TokenPrefixMatch
+	TokenSuffixMatch
+	TokenSubstringMatch
+	TokenColumn
+	TokenDelim
+	// Error tokens
+	TokenBadString
+	TokenBadURI
+	TokenBadEscape // a '\' right before a newline
+	// Single-character tokens
+	TokenColon
+	TokenSemicolon
+	TokenComma
+	TokenOpenBracket
+	TokenCloseBracket
+	TokenOpenParen
+	TokenCloseParen
+	TokenOpenBrace
+	TokenCloseBrace
+)
+
+// backwards compatibility
+const TokenChar = TokenDelim
+
+// tokenNames maps tokenType's to their names.  Used for conversion to string.
+var tokenNames = map[TokenType]string{
+	TokenError:          "error",
+	TokenEOF:            "EOF",
+	TokenIdent:          "IDENT",
+	TokenAtKeyword:      "ATKEYWORD",
+	TokenString:         "STRING",
+	TokenHash:           "HASH",
+	TokenNumber:         "NUMBER",
+	TokenPercentage:     "PERCENTAGE",
+	TokenDimension:      "DIMENSION",
+	TokenURI:            "URI",
+	TokenUnicodeRange:   "UNICODE-RANGE",
+	TokenCDO:            "CDO",
+	TokenCDC:            "CDC",
+	TokenS:              "S",
+	TokenComment:        "COMMENT",
+	TokenFunction:       "FUNCTION",
+	TokenIncludes:       "INCLUDES",
+	TokenDashMatch:      "DASHMATCH",
+	TokenPrefixMatch:    "PREFIXMATCH",
+	TokenSuffixMatch:    "SUFFIXMATCH",
+	TokenSubstringMatch: "SUBSTRINGMATCH",
+	TokenDelim:          "DELIM",
+	TokenBOM:            "BOM",
+	TokenBadString:      "BAD-STRING",
+	TokenBadURI:         "BAD-URI",
+	TokenBadEscape:      "BAD-ESCAPE",
+	TokenColumn:         "COLUMN",
+	TokenColon:          "COLON",
+	TokenSemicolon:      "SEMICOLON",
+	TokenComma:          "COMMA",
+	TokenOpenBracket:    "LEFT-BRACKET", // []
+	TokenCloseBracket:   "RIGHT-BRACKET",
+	TokenOpenParen:      "LEFT-PAREN", // ()
+	TokenCloseParen:     "RIGHT-PAREN",
+	TokenOpenBrace:      "LEFT-BRACE", // {}
+	TokenCloseBrace:     "RIGHT-BRACE",
+}
+
+// TokenExtra fills the .Extra field of a token.  Consumers should perform a
+// type cast to the proper type to inspect its data.
+type TokenExtra interface {
+	String() string
+}
+
+// TokenExtraTypeLookup provides a handy check for whether a given token type
+// should contain extra data.
+var TokenExtraTypeLookup = map[TokenType]interface{}{
+	TokenError:        &TokenExtraError{},
+	TokenBadEscape:    &TokenExtraError{},
+	TokenBadString:    &TokenExtraError{},
+	TokenBadURI:       &TokenExtraError{},
+	TokenHash:         &TokenExtraHash{},
+	TokenNumber:       &TokenExtraNumeric{},
+	TokenPercentage:   &TokenExtraNumeric{},
+	TokenDimension:    &TokenExtraNumeric{},
+	TokenUnicodeRange: &TokenExtraUnicodeRange{},
+}
+
+// TokenExtraHash is attached to TokenHash.
+type TokenExtraHash struct {
+	IsIdentifier bool
+}
+
+func (e *TokenExtraHash) String() string {
+	if e == nil || !e.IsIdentifier {
+		return "unrestricted"
+	} else {
+		return "id"
+	}
+}
+
+// TokenExtraNumeric is attached to TokenNumber, TokenPercentage, and
+// TokenDimension.
+type TokenExtraNumeric struct {
+	NonInteger bool
+	Dimension  string
+}
+
+func (e *TokenExtraNumeric) String() string {
+	if e == nil {
+		return ""
+	}
+	if e.Dimension != "" {
+		return e.Dimension
+	}
+	return ""
+}
+
+// TokenExtraUnicodeRange is attached to a TokenUnicodeRange.
+type TokenExtraUnicodeRange struct {
+	Start rune
+	End   rune
+}
+
+func (e *TokenExtraUnicodeRange) String() string {
+	if e == nil {
+		panic("TokenExtraUnicodeRange: unexpected nil pointer value")
+	}
+
+	if e.Start == e.End {
+		return fmt.Sprintf("%0X", e.Start)
+	} else {
+		return fmt.Sprintf("%0X-%0X", e.Start, e.End)
+	}
+}
+
+// TokenExtraError is attached to a TokenError and contains the same value as
+// Tokenizer.Err(). See also the ParseError type and ParseError.Recoverable().
+type TokenExtraError struct {
+	Err error
+}
+
+// String returns the error text.
+func (e *TokenExtraError) String() string {
+	return e.Err.Error()
+}
+
+// Error implements error.
+func (e *TokenExtraError) Error() string {
+	return e.Err.Error()
+}
+
+// Cause implements errors.Causer.
+func (e *TokenExtraError) Cause() error {
+	return e.Err
+}
+
+// Returns the ParseError object, if present.
+func (e *TokenExtraError) ParseError() *ParseError {
+	pe, ok := e.Err.(*ParseError)
+	if !ok {
+		return nil
+	}
+	return pe
+}
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
new file mode 100644
index 0000000..0f023fc
--- /dev/null
+++ b/scanner/tokenizer.go
@@ -0,0 +1,335 @@
+package scanner
+
+import (
+	"bufio"
+	stdErrors "errors"
+	"golang.org/x/text/transform"
+)
+
+var (
+	ErrBadEscape = &ParseError{Type: TokenBadEscape, Message: "bad escape (backslash-newline) in input"}
+)
+
+// Tokenizer scans an input and emits tokens following the CSS Syntax Level 3
+// specification.
+type Tokenizer struct {
+	r    *bufio.Reader
+	err  error
+	peek [3]byte
+
+	tok Token
+}
+
+// Construct a Tokenizer from the given input. Input need not be normalized.
+func NewTokenizer(r io.Reader) *Tokenizer {
+	return &Tokenizer{
+		r: bufio.NewReader(transform.NewReader(r, new(normalize))),
+	}
+}
+
+// Scan for the next token.  If the tokenizer is in an error state, no input will be consumed.  See .AcknowledgeError().
+func (z *Tokenizer) Scan() {
+	defer func() {
+		rec := recover()
+		if rErr, ok := rec.(error); ok {
+			z.err = rErr
+		} else if rec != nil {
+			panic(rec)
+		}
+	}()
+
+	if z.err != nil {
+		z.tok = z.next()
+	}
+}
+
+// Return the current token.
+func (z *Tokenizer) Token() Token {
+	return t.tok
+}
+
+func (z *Tokenizer) Err() error {
+	return t.err
+}
+
+// Acknowledge a returned error token.  This can only be called to clear TokenBadString, TokenBadURI, and TokenEscape.
+func (z *Tokenizer) AcknowledgeError() {
+	parseErr, ok := t.err.(*ParseError)
+	if !ok {
+		panic("cssparse: AcknowledgeError() called for a foreign error")
+	}
+}
+
+// repeek reads the next 3 bytes into the tokenizer.
+func (z *Tokenizer) repeek() {
+	by, err := z.r.Peek(3)
+	if err != nil {
+		panic(err)
+	}
+	copy(z.peek, by)
+
+	// zero fill on EOF
+	i := len(by)
+	for i < 3 {
+		z.peek[i] = 0
+		i++
+	}
+}
+
+// up to 2 bytes
+func isValidEscape(p []byte) bool {
+	if len(p) < 2 {
+		return false
+	}
+	if p[0] != '\\' {
+		return false
+	}
+	if p[1] == '\n' {
+		return false
+	}
+	return true
+}
+
+func isNameStart(p byte) bool {
+	if p > 0x7F {
+		return true // any high code points
+	}
+	if p == '_' {
+		return true
+	}
+	if p >= 'A' && p <= 'Z' {
+		return true
+	}
+	if p >= 'a' && p <= 'z' {
+		return true
+	}
+	return false
+}
+
+func isNameCode(p byte) {
+	if p > 0x7F {
+		return true // any high code points
+	}
+	if p == '_' || p == '-' {
+		return true
+	}
+	if p >= 'A' && p <= 'Z' {
+		return true
+	}
+	if p >= 'a' && p <= 'z' {
+		return true
+	}
+	if p >= '0' && p <= '9' {
+		return true
+	}
+	return false
+}
+
+// up to 3 bytes
+func isStartIdentifier(p []byte) bool {
+	if p[0] == '-' {
+		p = p[1:]
+	}
+	if isNameStart(p[0]) {
+		return true
+	} else if isValidEscape(p) {
+		return true
+	}
+	return false
+}
+
+// up to 3 bytes
+func isStartNumber(p []byte) bool {
+	if p[0] == '+' || p[0] == '-' {
+		p = p[1:]
+	}
+	if p[0] == '.' {
+		p = p[1:]
+	}
+	if p[0] >= '0' && p[0] <= '9' {
+		return true
+	}
+	return false
+}
+
+// repeek must be called before the following:
+
+func (z *Tokenizer) nextIsEscape() bool {
+	return isValidEscape(z.peek[:2])
+}
+
+func (z *Tokenizer) nextStartsIdentifier() bool {
+	return isStartIdentifier(z.peek[:3])
+}
+
+func (z *Tokenizer) nextIsNumber() bool {
+	return isStartNumber(z.peek[:3])
+}
+
+func (z *Tokenizer) nextCompare(vs string) bool {
+	return string(z.peek[:len(vs)]) == vs
+}
+
+var premadeTokens = map[byte]Token{
+	'$': Token{
+		Type:   TokenSuffixMatch,
+		String: "$=",
+	},
+	'*': Token{
+		Type:   TokenSubstringMatch,
+		String: "*=",
+	},
+	'^': Token{
+		Type:   TokenPrefixMatch,
+		String: "^=",
+	},
+	'~': Token{
+		Type:   TokenIncludeMatch,
+		String: "~=",
+	},
+	'(': Token{Type: TokenOpenParen, String: "("},
+	')': Token{Type: TokenCloseParen, String: ")"},
+	'[': Token{Type: TokenOpenBracket, String: "["},
+	']': Token{Type: TokenCloseBracket, String: "]"},
+	'{': Token{Type: TokenOpenBrace, String: "{"},
+	'}': Token{Type: TokenCloseBrace, String: "}"},
+	':': Token{Type: TokenColon, String: ":"},
+	';': Token{Type: TokenSemicolon, String: ";"},
+	',': Token{Type: TokenComma, String: ","},
+
+	'\\': Token{Type: TokenBadEscape, String: "\\"},
+
+	'A': Token{Type: TokenDashMatch, String: "|="},
+	'B': Token{Type: TokenColumn, String: "||"},
+	'C': Token{Type: TokenCDC, String: "-->"},
+	'O': Token{Type: TokenCDO, String: "<!--"},
+
+	'E': Token{Type: TokenEOF},
+}
+
+func (z *Tokenizer) consume() Token {
+	ch, err := z.r.ReadByte()
+	if err == io.EOF {
+		z.err = io.EOF
+		return premadeTokens['E']
+	} else if err != nil {
+		z.err = err
+		panic(err)
+	}
+
+	switch ch {
+	case '\n', '\t', ' ':
+		return z.consumeWhitespace(ch)
+	case '"', '\'':
+		return z.consumeString(ch)
+	case '#':
+		z.repeek()
+		if isNameCode(z.peek[0]) || z.nextIsEscape() {
+			e := &TokenExtraHash{}
+			t := &Token{
+				Type:  TokenHash,
+				Extra: e,
+			}
+			if z.nextStartsIdentifier() {
+				e.IsIdentifier = true
+			}
+			t.String = z.consumeName()
+			return t
+		}
+	case '(', ')', ',', ':', ';', '[', ']', '{', '}':
+		return premadeTokens[ch]
+	case '$', '*', '^', '~':
+		z.repeek()
+		if z.peek[0] == "=" {
+			z.r.Discard(1)
+			return premadeTokens[ch]
+		}
+	case '|':
+		z.repeek()
+		if z.peek[0] == "=" {
+			z.r.Discard(1)
+			return premadeTokens['A']
+		} else if z.peek[0] == "|" {
+			z.r.Discard(1)
+			return premadeTokens['B']
+		}
+	case '+':
+		z.repeek()
+		if z.nextIsNumber() {
+			z.r.UnreadByte()
+			return z.consumeNumber()
+		}
+	case '-':
+		z.repeek()
+		if z.nextIsNumber() {
+			z.r.UnreadByte()
+			return z.consumeNumber()
+		}
+		if z.nextStartsIdentifier() {
+			z.r.UnreadByte()
+			return z.consumeIdentish()
+		}
+		if z.nextCompare("->") {
+			z.r.Discard(2)
+			return premadeTokens['C']
+		}
+	case '.':
+		z.repeek()
+		if z.nextIsNumber() {
+			z.r.UnreadByte()
+			return z.consumeNumber()
+		}
+	case '/':
+		z.repeek()
+		if z.peek[0] == '*' {
+			return z.consumeComment()
+		}
+	case '<':
+		z.repeek()
+		if z.nextCompare("!--") {
+			z.r.Discard(3)
+			return premadeTokens['O']
+		}
+	case '@':
+		z.repeek()
+		if z.nextStartsIdentifier() {
+			s := z.consumeName()
+			return Token{
+				Type:   TokenAtKeyword,
+				String: s,
+			}
+		}
+	case '\\':
+		z.repeek()
+		if z.peek[0] != '\n' {
+			// input stream has a valid escape
+			z.r.UnreadByte()
+			return z.consumeIdentish()
+		}
+		z.err = ErrBadEscape
+		return premadeTokens['\\']
+	case 'U', 'u':
+		z.repeek()
+		if z.peek[0] == '+' && ((z.peek[1] >= '0' && z.peek[1] <= '9') ||
+			(z.peek[1] >= 'A' && z.peek[1] <= 'F') ||
+			(z.peek[1] >= 'a' && z.peek[1] <= 'f') ||
+			(z.peek[1] == '?')) {
+			z.r.Discard(1) // (!) only discard the plus sign
+			return z.consumeUnicodeRange()
+		}
+		break
+	}
+
+	if '0' <= ch && ch <= '9' {
+		z.r.UnreadByte()
+		return z.consumeNumber()
+	}
+	if isNameStart(ch) {
+		z.r.UnreadByte()
+		return z.consumeIdentish()
+	}
+	return Token{
+		Type:   TokenDelim,
+		String: string(rune(ch)),
+	}
+}

From 77fb5127c069d14b0ef606f3d158abdc859949c8 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 13 Mar 2018 13:29:48 -0700
Subject: [PATCH 05/33] stub out the rest of the parsing algo

---
 scanner/token.go     |   1 +
 scanner/tokenizer.go | 122 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/scanner/token.go b/scanner/token.go
index 3a7dd27..11270ad 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -165,6 +165,7 @@ func (e *TokenExtraHash) String() string {
 // TokenExtraNumeric is attached to TokenNumber, TokenPercentage, and
 // TokenDimension.
 type TokenExtraNumeric struct {
+	// Value float64 // omitted from this implementation
 	NonInteger bool
 	Dimension  string
 }
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 0f023fc..716a96f 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -76,6 +76,7 @@ func (z *Tokenizer) repeek() {
 	}
 }
 
+// §4.3.8
 // up to 2 bytes
 func isValidEscape(p []byte) bool {
 	if len(p) < 2 {
@@ -90,6 +91,7 @@ func isValidEscape(p []byte) bool {
 	return true
 }
 
+// §4.3.9
 func isNameStart(p byte) bool {
 	if p > 0x7F {
 		return true // any high code points
@@ -138,6 +140,7 @@ func isStartIdentifier(p []byte) bool {
 	return false
 }
 
+// §4.3.10
 // up to 3 bytes
 func isStartNumber(p []byte) bool {
 	if p[0] == '+' || p[0] == '-' {
@@ -207,6 +210,7 @@ var premadeTokens = map[byte]Token{
 	'E': Token{Type: TokenEOF},
 }
 
+// 4.3.1
 func (z *Tokenizer) consume() Token {
 	ch, err := z.r.ReadByte()
 	if err == io.EOF {
@@ -257,13 +261,13 @@ func (z *Tokenizer) consume() Token {
 		z.repeek()
 		if z.nextIsNumber() {
 			z.r.UnreadByte()
-			return z.consumeNumber()
+			return z.consumeNumeric()
 		}
 	case '-':
 		z.repeek()
 		if z.nextIsNumber() {
 			z.r.UnreadByte()
-			return z.consumeNumber()
+			return z.consumeNumeric()
 		}
 		if z.nextStartsIdentifier() {
 			z.r.UnreadByte()
@@ -277,7 +281,7 @@ func (z *Tokenizer) consume() Token {
 		z.repeek()
 		if z.nextIsNumber() {
 			z.r.UnreadByte()
-			return z.consumeNumber()
+			return z.consumeNumeric()
 		}
 	case '/':
 		z.repeek()
@@ -322,7 +326,7 @@ func (z *Tokenizer) consume() Token {
 
 	if '0' <= ch && ch <= '9' {
 		z.r.UnreadByte()
-		return z.consumeNumber()
+		return z.consumeNumeric()
 	}
 	if isNameStart(ch) {
 		z.r.UnreadByte()
@@ -333,3 +337,113 @@ func (z *Tokenizer) consume() Token {
 		String: string(rune(ch)),
 	}
 }
+
+// return the next byte, with 0 on EOF and panicing on other errors
+func (z *Tokenizer) nextByte() byte {
+	by, err := z.r.ReadByte()
+	if err == io.EOF {
+		return 0
+	} else if err != nil {
+		panic(err)
+	}
+	return by
+}
+
+// 4.3.2
+func (z *Tokenizer) consumeNumeric() Token {
+	repr, notInteger := z.consumeNumericInner()
+	e := &TokenExtraNumeric{
+		NonInteger: notInteger,
+	}
+	t := Token{
+		Type:   TokenNumeric,
+		String: string(repr),
+		Extra:  e,
+	}
+	z.repeek()
+	if z.nextStartsIdentifier() {
+		t.Type = TokenDimension
+		e.Dimension = z.consumeName()
+	} else if z.peek[0] == '%' {
+		z.r.Discard(1)
+		t.Type = TokenPercentage
+	}
+	return t
+}
+
+// §4.3.3
+func (z *Tokenizer) consumeIdentish() Token {
+}
+
+// §4.3.4
+func (z *Tokenizer) consumeString(delim byte) Token {
+}
+
+// §4.3.5
+func (z *Tokenizer) consumeURL() Token {
+}
+
+// §4.3.6
+func (z *Tokenizer) consumeUnicodeRange() Token {
+}
+
+// §4.3.7
+func (z *Tokenizer) consumeEscapedCP() rune {
+}
+
+// §4.3.11
+func (z *Tokenizer) consumeName() string {
+}
+
+// §4.3.12
+func (z *Tokenizer) consumeNumericInner() (repr []byte, notInteger bool) {
+	by := z.nextByte()
+	if by == '+' || by == '-' {
+		repr = append(repr, by)
+		by = z.nextByte()
+	}
+	consumeDigits := func() {
+		for '0' <= by && by <= '9' {
+			repr = append(repr, by)
+			by = z.nextByte()
+		}
+		if by != 0 {
+			// don't attempt to unread EOF
+			z.r.UnreadByte()
+		}
+	}
+
+	consumeDigits()
+	z.repeek()
+	if z.peek[0] == '.' && '0' <= z.peek[1] && z.peek[1] <= '9' {
+		notInteger = true
+
+		by = z.nextByte() // '.'
+		repr = append(repr, by)
+		by = z.nextByte()
+		consumeDigits()
+		z.repeek()
+	}
+	// [eE][+-]?[0-9]
+	if z.peek[0] == 'e' || z.peek[0] == 'E' {
+		var n int
+		if z.peek[1] == '+' && z.peek[1] == '-' && ('0' <= z.peek[2] && z.peek[2] <= '9') {
+			n = 3
+		} else if '0' <= z.peek[1] && z.peek[1] <= '9' {
+			n = 2
+		}
+		if n != 0 {
+			notInteger = true
+			repr = append(repr, z.peek[:n]...)
+			z.r.Discard(n)
+			by = z.nextByte()
+			consumeDigits()
+		}
+	}
+
+	return repr, notInteger
+}
+
+// §4.3.14
+func (z *Tokenizer) consumeBadURL() string {
+}

From c626905069a488e4d584b5aefd079c59398497f6 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 14:07:33 -0700
Subject: [PATCH 06/33] Finish tokenizer, start fixing tests

---
 scanner/crlf.go         |   8 +-
 scanner/scanner.go      |  24 +-
 scanner/scanner_test.go |  49 ++--
 scanner/token.go        |  25 +-
 scanner/tokenizer.go    | 609 ++++++++++++++++++++++++++++++++++------
 5 files changed, 596 insertions(+), 119 deletions(-)

diff --git a/scanner/crlf.go b/scanner/crlf.go
index ba5c7dc..ee6b523 100644
--- a/scanner/crlf.go
+++ b/scanner/crlf.go
@@ -1,3 +1,5 @@
+// Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
+
 package scanner
 
 // The crlf package helps in dealing with files that have DOS-style CR/LF line
@@ -7,11 +9,7 @@ package scanner
 //
 // package crlf
 
-import (
-	"io"
-
-	"golang.org/x/text/transform"
-)
+import "golang.org/x/text/transform"
 
 // Normalize takes CRLF, CR, or LF line endings in src, and converts them
 // to LF in dst.
diff --git a/scanner/scanner.go b/scanner/scanner.go
index cbe5fdf..7f034e2 100644
--- a/scanner/scanner.go
+++ b/scanner/scanner.go
@@ -1,3 +1,7 @@
+// Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
+
+//+build ignore
+
 // Copyright 2012 The Gorilla Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
@@ -5,6 +9,7 @@
 package scanner
 
 import (
+	"bufio"
 	"fmt"
 	"regexp"
 	"strings"
@@ -57,7 +62,7 @@ var macros = map[string]string{
 }
 
 // productions maps the list of tokens to patterns to be expanded.
-var productions = map[tokenType]string{
+var productions = map[TokenType]string{
 	// Unused regexps (matched using other methods) are commented out.
 	TokenIdent:        `{ident}`,
 	TokenAtKeyword:    `@{ident}`,
@@ -86,11 +91,11 @@ var productions = map[tokenType]string{
 //
 // The map is filled on init() using the macros and productions defined in
 // the CSS specification.
-var matchers = map[tokenType]*regexp.Regexp{}
+var matchers = map[TokenType]*regexp.Regexp{}
 
 // matchOrder is the order to test regexps when first-char shortcuts
 // can't be used.
-var matchOrder = []tokenType{
+var matchOrder = []TokenType{
 	TokenURI,
 	TokenFunction,
 	TokenUnicodeRange,
@@ -116,6 +121,13 @@ func init() {
 
 // Scanner --------------------------------------------------------------------
 
+type Scanner struct {
+	input string
+	row   int
+	col   int
+	pos   int
+}
+
 // New returns a new CSS scanner for the given input.
 func New(r *bufio.Reader) *Scanner {
 	// Normalize newlines.
@@ -245,7 +257,7 @@ func (s *Scanner) updatePosition(text string) {
 }
 
 // emitToken returns a Token for the string v and updates the scanner position.
-func (s *Scanner) emitToken(t tokenType, v string) *Token {
+func (s *Scanner) emitToken(t TokenType, v string) *Token {
 	token := &Token{t, v, s.row, s.col}
 	s.updatePosition(v)
 	return token
@@ -255,7 +267,7 @@ func (s *Scanner) emitToken(t tokenType, v string) *Token {
 // position in a simplified manner.
 //
 // The string is known to have only ASCII characters and to not have a newline.
-func (s *Scanner) emitSimple(t tokenType, v string) *Token {
+func (s *Scanner) emitSimple(t TokenType, v string) *Token {
 	token := &Token{t, v, s.row, s.col}
 	s.col += len(v)
 	s.pos += len(v)
@@ -267,7 +279,7 @@ func (s *Scanner) emitSimple(t tokenType, v string) *Token {
 // first character from the prefix.
 //
 // The prefix is known to have only ASCII characters and to not have a newline.
-func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
+func (s *Scanner) emitPrefixOrChar(t TokenType, prefix string) *Token {
 	if strings.HasPrefix(s.input[s.pos:], prefix) {
 		return s.emitSimple(t, prefix)
 	}
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index c3411c5..a349e16 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -5,46 +5,55 @@
 package scanner
 
 import (
+	"strings"
 	"testing"
 )
 
 func TestMatchers(t *testing.T) {
 	// Just basic checks, not exhaustive at all.
 	checkMatch := func(s string, ttList ...interface{}) {
-		scanner := New(s)
+		tz := NewTokenizer(strings.NewReader(s))
 
 		i := 0
 		for i < len(ttList) {
-			tt := ttList[i].(tokenType)
+			tt := ttList[i].(TokenType)
 			tVal := ttList[i+1].(string)
-			if tok := scanner.Next(); tok.Type != tt || tok.Value != tVal {
-				t.Errorf("did not match: %s (got %v)", s, tok)
+			if tok := tz.Next(); tok.Type != tt || tok.Value != tVal {
+				t.Errorf("did not match: %s (got %s, wanted %s): %v", s, tok.Value, tVal, tok)
 			}
 
 			i += 2
 		}
 
-		if tok := scanner.Next(); tok.Type != TokenEOF {
+		if tok := tz.Next(); tok.Type != TokenEOF {
 			t.Errorf("missing EOF after token %s, got %+v", s, tok)
+			if tok := tz.Next(); tok.Type != TokenEOF {
+				t.Errorf("double missing EOF after token %s, got %+v", s, tok)
+			}
 		}
 	}
 
 	checkMatch("abcd", TokenIdent, "abcd")
-	checkMatch(`"abcd"`, TokenString, `"abcd"`)
-	checkMatch(`"ab'cd"`, TokenString, `"ab'cd"`)
-	checkMatch(`"ab\"cd"`, TokenString, `"ab\"cd"`)
-	checkMatch(`"ab\\cd"`, TokenString, `"ab\\cd"`)
-	checkMatch("'abcd'", TokenString, "'abcd'")
-	checkMatch(`'ab"cd'`, TokenString, `'ab"cd'`)
-	checkMatch(`'ab\'cd'`, TokenString, `'ab\'cd'`)
-	checkMatch(`'ab\\cd'`, TokenString, `'ab\\cd'`)
-	checkMatch("#name", TokenHash, "#name")
-	checkMatch("42''", TokenNumber, "42", TokenString, "''")
+	checkMatch(`"abcd"`, TokenString, `abcd`)
+	checkMatch(`"ab'cd"`, TokenString, `ab'cd`)
+	checkMatch(`"ab\"cd"`, TokenString, `ab"cd`)
+	checkMatch(`"ab\\cd"`, TokenString, `ab\cd`)
+	checkMatch("'abcd'", TokenString, "abcd")
+	checkMatch(`'ab"cd'`, TokenString, `ab"cd`)
+	checkMatch(`'ab\'cd'`, TokenString, `ab'cd`)
+	checkMatch(`'ab\\cd'`, TokenString, `ab\cd`)
+	checkMatch("#name", TokenHash, "name")
+	checkMatch("##name", TokenDelim, "#", TokenHash, "name")
+	checkMatch("42''", TokenNumber, "42", TokenString, "")
+	checkMatch("+42", TokenNumber, "+42")
+	checkMatch("-42", TokenNumber, "-42")
 	checkMatch("4.2", TokenNumber, "4.2")
 	checkMatch(".42", TokenNumber, ".42")
-	checkMatch("42%", TokenPercentage, "42%")
-	checkMatch("4.2%", TokenPercentage, "4.2%")
-	checkMatch(".42%", TokenPercentage, ".42%")
+	checkMatch("+.42", TokenNumber, "+.42")
+	checkMatch("-.42", TokenNumber, "-.42")
+	checkMatch("42%", TokenPercentage, "42")
+	checkMatch("4.2%", TokenPercentage, "4.2")
+	checkMatch(".42%", TokenPercentage, ".42")
 	checkMatch("42px", TokenDimension, "42px")
 	checkMatch("url(http://domain.com)", TokenURI, "url(http://domain.com)")
 	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "url( http://domain.com/uri/between/space )")
@@ -63,13 +72,13 @@ func TestMatchers(t *testing.T) {
 	checkMatch("-->", TokenCDC, "-->")
 	checkMatch("   \n   \t   \n", TokenS, "   \n   \t   \n")
 	checkMatch("/* foo */", TokenComment, "/* foo */")
-	checkMatch("bar(", TokenFunction, "bar(")
+	checkMatch("bar(", TokenFunction, "bar")
 	checkMatch("~=", TokenIncludes, "~=")
 	checkMatch("|=", TokenDashMatch, "|=")
 	checkMatch("^=", TokenPrefixMatch, "^=")
 	checkMatch("$=", TokenSuffixMatch, "$=")
 	checkMatch("*=", TokenSubstringMatch, "*=")
 	checkMatch("{", TokenChar, "{")
-	checkMatch("\uFEFF", TokenBOM, "\uFEFF")
+	// checkMatch("\uFEFF", TokenBOM, "\uFEFF")
 	checkMatch(`╯︵┻━┻"stuff"`, TokenIdent, "╯︵┻━┻", TokenString, `"stuff"`)
 }
diff --git a/scanner/token.go b/scanner/token.go
index 11270ad..ab5b377 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -1,9 +1,12 @@
 // Copyright 2018 Kane York.
+// Copyright 2012 The Gorilla Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
 package scanner
 
+import "fmt"
+
 // TokenType identifies the type of lexical tokens.
 type TokenType int
 
@@ -31,10 +34,21 @@ func (t TokenType) SimpleToken() bool {
 	return true
 }
 
+// ParseError represents a CSS syntax error.
+type ParseError struct {
+	Type    TokenType
+	Message string
+	Loc     int
+}
+
+func (e *ParseError) Error() string {
+	return e.Message
+}
+
 // Token represents a token in the CSS syntax.
 type Token struct {
-	Type   TokenType
-	String string
+	Type  TokenType
+	Value string
 	// Extra data for the token beyond a simple string.
 	// Will always be a pointer to a "Token*Extra" type in this package.
 	Extra TokenExtra
@@ -43,7 +57,7 @@ type Token struct {
 // The complete list of tokens in CSS Syntax Level 3.
 const (
 	// Scanner flags.
-	TokenError tokenType = iota
+	TokenError TokenType = iota
 	TokenEOF
 	// From now on, only tokens from the CSS specification.
 	TokenIdent
@@ -113,7 +127,6 @@ var tokenNames = map[TokenType]string{
 	TokenSuffixMatch:    "SUFFIXMATCH",
 	TokenSubstringMatch: "SUBSTRINGMATCH",
 	TokenDelim:          "DELIM",
-	TokenBOM:            "BOM",
 	TokenBadString:      "BAD-STRING",
 	TokenBadURI:         "BAD-URI",
 	TokenBadEscape:      "BAD-ESCAPE",
@@ -192,9 +205,9 @@ func (e *TokenExtraUnicodeRange) String() string {
 	}
 
 	if e.Start == e.End {
-		return fmt.Sprintf("%0X", e.Start)
+		return fmt.Sprintf("U+%04X", e.Start)
 	} else {
-		return fmt.Sprintf("%0X-%0X", e.Start, e.End)
+		return fmt.Sprintf("U+%04X-%04X", e.Start, e.End)
 	}
 }
 
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 716a96f..24e8720 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -1,13 +1,21 @@
+// Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
+
 package scanner
 
 import (
 	"bufio"
-	stdErrors "errors"
+	"bytes"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+	"unicode/utf8"
+
 	"golang.org/x/text/transform"
 )
 
 var (
-	ErrBadEscape = &ParseError{Type: TokenBadEscape, Message: "bad escape (backslash-newline) in input"}
+	errBadEscape = &ParseError{Type: TokenBadEscape, Message: "bad escape (backslash-newline) in input"}
 )
 
 // Tokenizer scans an input and emits tokens following the CSS Syntax Level 3
@@ -27,46 +35,73 @@ func NewTokenizer(r io.Reader) *Tokenizer {
 	}
 }
 
-// Scan for the next token.  If the tokenizer is in an error state, no input will be consumed.  See .AcknowledgeError().
+// Scan for the next token.  If the tokenizer is in an error state, no input
+// will be consumed, and .AcknowledgeError() should be called instead.
 func (z *Tokenizer) Scan() {
 	defer func() {
 		rec := recover()
 		if rErr, ok := rec.(error); ok {
+			// we only ever panic(err)
 			z.err = rErr
+			z.tok = Token{
+				Type:  TokenError,
+				Extra: &TokenExtraError{Err: z.err},
+			}
 		} else if rec != nil {
 			panic(rec)
 		}
 	}()
 
-	if z.err != nil {
-		z.tok = z.next()
+	if z.err == nil {
+		z.tok = z.consume()
+	} else if z.err == io.EOF {
+		z.tok = Token{
+			Type: TokenEOF,
+		}
+	} else {
+		z.tok = Token{
+			Type:  TokenError,
+			Value: z.err.Error(),
+			Extra: &TokenExtraError{Err: z.err},
+		}
 	}
 }
 
 // Return the current token.
 func (z *Tokenizer) Token() Token {
-	return t.tok
+	return z.tok
 }
 
+// Combines the calls to Scan() and Token().
+func (z *Tokenizer) Next() Token {
+	z.Scan()
+	return z.tok
+}
+
+// Err returns the last error to be encountered and not cleared.
 func (z *Tokenizer) Err() error {
-	return t.err
+	return z.err
 }
 
-// Acknowledge a returned error token.  This can only be called to clear TokenBadString, TokenBadURI, and TokenEscape.
+// Acknowledge a returned error token.  This can only be called to clear
+// TokenBadString, TokenBadURI, and TokenBadEscape.  Using it for non-parsing
+// errors will panic.
 func (z *Tokenizer) AcknowledgeError() {
-	parseErr, ok := t.err.(*ParseError)
+	_, ok := z.err.(*ParseError)
 	if !ok {
-		panic("cssparse: AcknowledgeError() called for a foreign error")
+		panic("cssparse: AcknowledgeError() called for a foreign (non-syntax) error")
 	}
+	z.err = nil
 }
 
-// repeek reads the next 3 bytes into the tokenizer.
+// repeek reads the next 3 bytes into the tokenizer. on EOF, the bytes are
+// filled with zeroes.  (Null bytes in the input are preprocessed into U+FFFD.)
 func (z *Tokenizer) repeek() {
 	by, err := z.r.Peek(3)
-	if err != nil {
+	if err != nil && err != io.EOF {
 		panic(err)
 	}
-	copy(z.peek, by)
+	copy(z.peek[:], by)
 
 	// zero fill on EOF
 	i := len(by)
@@ -93,7 +128,7 @@ func isValidEscape(p []byte) bool {
 
 // §4.3.9
 func isNameStart(p byte) bool {
-	if p > 0x7F {
+	if p >= utf8.RuneSelf {
 		return true // any high code points
 	}
 	if p == '_' {
@@ -108,8 +143,8 @@ func isNameStart(p byte) bool {
 	return false
 }
 
-func isNameCode(p byte) {
-	if p > 0x7F {
+func isNameCode(p byte) bool {
+	if p >= utf8.RuneSelf {
 		return true // any high code points
 	}
 	if p == '_' || p == '-' {
@@ -127,6 +162,19 @@ func isNameCode(p byte) {
 	return false
 }
 
+func isHexDigit(p byte) bool {
+	if p >= 'A' && p <= 'F' {
+		return true
+	}
+	if p >= 'a' && p <= 'f' {
+		return true
+	}
+	if p >= '0' && p <= '9' {
+		return true
+	}
+	return false
+}
+
 // up to 3 bytes
 func isStartIdentifier(p []byte) bool {
 	if p[0] == '-' {
@@ -175,53 +223,48 @@ func (z *Tokenizer) nextCompare(vs string) bool {
 
 var premadeTokens = map[byte]Token{
 	'$': Token{
-		Type:   TokenSuffixMatch,
-		String: "$=",
+		Type:  TokenSuffixMatch,
+		Value: "$=",
 	},
 	'*': Token{
-		Type:   TokenSubstringMatch,
-		String: "*=",
+		Type:  TokenSubstringMatch,
+		Value: "*=",
 	},
 	'^': Token{
-		Type:   TokenPrefixMatch,
-		String: "^=",
+		Type:  TokenPrefixMatch,
+		Value: "^=",
 	},
 	'~': Token{
-		Type:   TokenIncludeMatch,
-		String: "~=",
+		Type:  TokenIncludes,
+		Value: "~=",
 	},
-	'(': Token{Type: TokenOpenParen, String: "("},
-	')': Token{Type: TokenCloseParen, String: ")"},
-	'[': Token{Type: TokenOpenBracket, String: "["},
-	']': Token{Type: TokenCloseBracket, String: "]"},
-	'{': Token{Type: TokenOpenBrace, String: "{"},
-	'}': Token{Type: TokenCloseBrace, String: "}"},
-	':': Token{Type: TokenColon, String: ":"},
-	';': Token{Type: TokenSemicolon, String: ";"},
-	',': Token{Type: TokenComma, String: ","},
-
-	'\\': Token{Type: TokenBadEscape, String: "\\"},
-
-	'A': Token{Type: TokenDashMatch, String: "|="},
-	'B': Token{Type: TokenColumn, String: "||"},
-	'C': Token{Type: TokenCDC, String: "-->"},
-	'O': Token{Type: TokenCDO, String: "<!--"},
+	'(': Token{Type: TokenOpenParen, Value: "("},
+	')': Token{Type: TokenCloseParen, Value: ")"},
+	'[': Token{Type: TokenOpenBracket, Value: "["},
+	']': Token{Type: TokenCloseBracket, Value: "]"},
+	'{': Token{Type: TokenOpenBrace, Value: "{"},
+	'}': Token{Type: TokenCloseBrace, Value: "}"},
+	':': Token{Type: TokenColon, Value: ":"},
+	';': Token{Type: TokenSemicolon, Value: ";"},
+	',': Token{Type: TokenComma, Value: ","},
+
+	'\\': Token{Type: TokenBadEscape, Value: "\\"},
+
+	'A': Token{Type: TokenDashMatch, Value: "|="},
+	'B': Token{Type: TokenColumn, Value: "||"},
+	'C': Token{Type: TokenCDC, Value: "-->"},
+	'O': Token{Type: TokenCDO, Value: "<!--"},
 
 	'E': Token{Type: TokenEOF},
 }
 
 // 4.3.1
 func (z *Tokenizer) consume() Token {
-	ch, err := z.r.ReadByte()
-	if err == io.EOF {
-		z.err = io.EOF
-		return premadeTokens['E']
-	} else if err != nil {
-		z.err = err
-		panic(err)
-	}
+	ch := z.nextByte()
 
 	switch ch {
+	case 0: // EOF
+		return premadeTokens['E']
 	case '\n', '\t', ' ':
 		return z.consumeWhitespace(ch)
 	case '"', '\'':
@@ -229,63 +272,65 @@ func (z *Tokenizer) consume() Token {
 	case '#':
 		z.repeek()
 		if isNameCode(z.peek[0]) || z.nextIsEscape() {
-			e := &TokenExtraHash{}
-			t := &Token{
+			e := &TokenExtraHash{
+				IsIdentifier: z.nextStartsIdentifier(),
+			}
+			return Token{
 				Type:  TokenHash,
 				Extra: e,
+				Value: z.consumeName(),
 			}
-			if z.nextStartsIdentifier() {
-				e.IsIdentifier = true
-			}
-			t.String = z.consumeName()
-			return t
 		}
+		break
 	case '(', ')', ',', ':', ';', '[', ']', '{', '}':
 		return premadeTokens[ch]
 	case '$', '*', '^', '~':
 		z.repeek()
-		if z.peek[0] == "=" {
+		if z.peek[0] == '=' {
 			z.r.Discard(1)
 			return premadeTokens[ch]
 		}
 	case '|':
 		z.repeek()
-		if z.peek[0] == "=" {
+		if z.peek[0] == '=' {
 			z.r.Discard(1)
 			return premadeTokens['A']
-		} else if z.peek[0] == "|" {
+		} else if z.peek[0] == '|' {
 			z.r.Discard(1)
 			return premadeTokens['B']
 		}
 	case '+':
+		z.unreadByte()
 		z.repeek()
 		if z.nextIsNumber() {
-			z.r.UnreadByte()
 			return z.consumeNumeric()
 		}
+		z.readByte() // re-read, fall down to TokenDelim
 	case '-':
+		z.unreadByte()
 		z.repeek()
 		if z.nextIsNumber() {
-			z.r.UnreadByte()
 			return z.consumeNumeric()
 		}
 		if z.nextStartsIdentifier() {
-			z.r.UnreadByte()
 			return z.consumeIdentish()
 		}
-		if z.nextCompare("->") {
+		if z.nextCompare("-->") {
 			z.r.Discard(2)
 			return premadeTokens['C']
 		}
+		z.readByte() // re-read, fall down to TokenDelim
 	case '.':
+		z.unreadByte()
 		z.repeek()
 		if z.nextIsNumber() {
-			z.r.UnreadByte()
 			return z.consumeNumeric()
 		}
+		z.readByte() // re-read, fall down to TokenDelim
 	case '/':
 		z.repeek()
 		if z.peek[0] == '*' {
+			z.r.Discard(1)
 			return z.consumeComment()
 		}
 	case '<':
@@ -299,25 +344,23 @@ func (z *Tokenizer) consume() Token {
 		if z.nextStartsIdentifier() {
 			s := z.consumeName()
 			return Token{
-				Type:   TokenAtKeyword,
-				String: s,
+				Type:  TokenAtKeyword,
+				Value: s,
 			}
 		}
 	case '\\':
+		z.unreadByte()
 		z.repeek()
-		if z.peek[0] != '\n' {
+		if z.nextIsEscape() {
 			// input stream has a valid escape
-			z.r.UnreadByte()
 			return z.consumeIdentish()
 		}
-		z.err = ErrBadEscape
+		z.readByte()
+		z.err = errBadEscape
 		return premadeTokens['\\']
 	case 'U', 'u':
 		z.repeek()
-		if z.peek[0] == '+' && ((z.peek[1] >= '0' && z.peek[1] <= '9') ||
-			(z.peek[1] >= 'A' && z.peek[1] <= 'F') ||
-			(z.peek[1] >= 'a' && z.peek[1] <= 'f') ||
-			(z.peek[1] == '?')) {
+		if z.peek[0] == '+' && (isHexDigit(z.peek[1]) || (z.peek[1] == '?')) {
 			z.r.Discard(1) // (!) only discard the plus sign
 			return z.consumeUnicodeRange()
 		}
@@ -325,23 +368,27 @@ func (z *Tokenizer) consume() Token {
 	}
 
 	if '0' <= ch && ch <= '9' {
-		z.r.UnreadByte()
+		z.unreadByte()
 		return z.consumeNumeric()
 	}
 	if isNameStart(ch) {
-		z.r.UnreadByte()
+		z.unreadByte()
 		return z.consumeIdentish()
 	}
 	return Token{
-		Type:   TokenDelim,
-		String: string(rune(ch)),
+		Type:  TokenDelim,
+		Value: string(rune(ch)),
 	}
 }
 
 // return the next byte, with 0 on EOF and panicing on other errors
 func (z *Tokenizer) nextByte() byte {
+	if z.err == io.EOF {
+		return 0
+	}
 	by, err := z.r.ReadByte()
 	if err == io.EOF {
+		z.err = io.EOF
 		return 0
 	} else if err != nil {
 		panic(err)
@@ -349,6 +396,69 @@ func (z *Tokenizer) nextByte() byte {
 	return by
 }
 
+func (z *Tokenizer) unreadByte() {
+	if z.err == io.EOF {
+		// don't unread after EOF
+		return
+	}
+	z.r.UnreadByte()
+}
+
+func isWhitespace(r rune) bool {
+	return r == ' ' || r == '\t' || r == '\n'
+}
+
+func isNotWhitespace(r rune) bool {
+	return !isWhitespace(r)
+}
+
+func (z *Tokenizer) consumeWhitespace(ch byte) Token {
+	const wsBufSize = 32
+
+	sawNewline := false
+	if ch == '\n' {
+		sawNewline = true
+	}
+
+	for {
+		// Consume whitespace in chunks of up to wsBufSize
+		buf, err := z.r.Peek(wsBufSize)
+		if err != nil && err != io.EOF {
+			panic(err)
+		}
+		if len(buf) == 0 {
+			break // Reached EOF
+		}
+		// find first non-whitespace char, discard up to there
+		idx := bytes.IndexFunc(buf, isNotWhitespace)
+		if idx == 0 {
+			break // Nothing to trim
+		}
+		if idx == -1 {
+			idx = len(buf) // Entire buffer is spaces
+		}
+		if /* const */ ch != 0 {
+			// only check for newlines when we're actually outputting a token
+			nlIdx := bytes.IndexByte(buf[:idx], '\n')
+			if nlIdx != -1 {
+				sawNewline = true
+			}
+		}
+		z.r.Discard(idx)
+	}
+
+	if sawNewline {
+		return Token{
+			Type:  TokenS,
+			Value: "\n",
+		}
+	}
+	return Token{
+		Type:  TokenS,
+		Value: " ",
+	}
+}
+
 // 4.3.2
 func (z *Tokenizer) consumeNumeric() Token {
 	repr, notInteger := z.consumeNumericInner()
@@ -356,9 +466,9 @@ func (z *Tokenizer) consumeNumeric() Token {
 		NonInteger: notInteger,
 	}
 	t := Token{
-		Type:   TokenNumeric,
-		String: string(repr),
-		Extra:  e,
+		Type:  TokenNumber,
+		Value: string(repr),
+		Extra: e,
 	}
 	z.repeek()
 	if z.nextStartsIdentifier() {
@@ -373,26 +483,342 @@ func (z *Tokenizer) consumeNumeric() Token {
 
 // §4.3.3
 func (z *Tokenizer) consumeIdentish() Token {
+	s := z.consumeName()
+	z.repeek()
+	if z.peek[0] == '(' {
+		z.r.Discard(1)
+		if strings.EqualFold(s, "url") {
+			return z.consumeURL()
+		}
+		return Token{
+			Type:  TokenFunction,
+			Value: s,
+		}
+	}
+	return Token{
+		Type:  TokenIdent,
+		Value: s,
+	}
 }
 
 // §4.3.4
 func (z *Tokenizer) consumeString(delim byte) Token {
+	var frag []byte
+	var by byte
+	for {
+		by = z.nextByte()
+		if by == delim || by == 0 {
+			// end of string, EOF
+			return Token{
+				Type:  TokenString,
+				Value: string(frag),
+			}
+		} else if by == '\n' {
+			z.unreadByte()
+			z.err = &ParseError{
+				Type:    TokenBadString,
+				Message: "unterminated string",
+			}
+			return Token{
+				Type:  TokenBadString,
+				Value: string(frag),
+				Extra: &TokenExtraError{Err: z.err},
+			}
+		} else if by == '\\' {
+			z.repeek()
+			if z.peek[0] == 0 {
+				// escape @ EOF, ignore.
+			} else if z.peek[0] == '\n' {
+				// valid escaped newline, ignore.
+				z.r.Discard(1)
+			} else if true {
+				// stream will always contain a valid escape here
+				cp := z.consumeEscapedCP()
+				var tmp [utf8.UTFMax]byte
+				n := utf8.EncodeRune(tmp[:], cp)
+				frag = append(frag, tmp[:n]...)
+			}
+		} else {
+			frag = append(frag, by)
+		}
+	}
 }
 
 // §4.3.5
+// reader must be in the "url(" state
 func (z *Tokenizer) consumeURL() Token {
+	z.consumeWhitespace(0)
+	z.repeek()
+	if z.peek[0] == 0 {
+		return Token{
+			Type:  TokenURI,
+			Value: "",
+		}
+	} else if z.peek[0] == '\'' || z.peek[0] == '"' {
+		t := z.consumeString(z.peek[0])
+		if t.Type == TokenBadString {
+			t.Type = TokenBadURI
+			t.Value += z.consumeBadURL()
+			z.err = &ParseError{
+				Type:    TokenBadURI,
+				Message: "unterminated string in url()",
+			}
+			t.Extra = &TokenExtraError{
+				Err: z.err,
+			}
+			return t
+		}
+		t.Type = TokenURI
+		z.consumeWhitespace(0)
+		z.repeek()
+		if z.peek[0] == ')' || z.peek[0] == 0 {
+			z.nextByte()
+			return t
+		}
+		t.Type = TokenBadURI
+		t.Value += z.consumeBadURL()
+		z.err = &ParseError{
+			Type:    TokenBadURI,
+			Message: "url() with string missing close parenthesis",
+		}
+		t.Extra = &TokenExtraError{
+			Err: z.err,
+		}
+		return t
+	}
+	var frag []byte
+	var by byte
+	for {
+		by = z.nextByte()
+		if by == ')' || by == 0 {
+			return Token{Type: TokenURI, Value: string(frag)}
+		} else if isWhitespace(rune(by)) {
+			z.consumeWhitespace(0)
+			z.repeek()
+			if z.peek[0] == ')' || z.peek[0] == 0 {
+				z.nextByte() // ')'
+				return Token{Type: TokenURI, Value: string(frag)}
+			}
+			z.err = &ParseError{
+				Type:    TokenBadURI,
+				Message: "bare url() with internal whitespace",
+			}
+			return Token{
+				Type:  TokenBadURI,
+				Value: string(frag) + z.consumeBadURL(),
+				Extra: &TokenExtraError{Err: z.err},
+			}
+		} else if by == '\'' || by == '"' || by == '(' {
+			z.err = &ParseError{
+				Type:    TokenBadURI,
+				Message: fmt.Sprintf("bare url() with illegal character '%c'", by),
+			}
+			return Token{
+				Type:  TokenBadURI,
+				Value: string(frag) + z.consumeBadURL(),
+				Extra: &TokenExtraError{Err: z.err},
+			}
+		} else if (0 <= by && by <= 0x08) || (0x0B == by) || (0x0E <= by && by <= 0x1F) || (0x7F == by) {
+			z.err = &ParseError{
+				Type:    TokenBadURI,
+				Message: fmt.Sprintf("bare url() with unprintable character '%d'", by),
+			}
+			return Token{
+				Type:  TokenBadURI,
+				Value: string(frag) + z.consumeBadURL(),
+				Extra: &TokenExtraError{Err: z.err},
+			}
+		} else if by == '\\' {
+			z.repeek()
+			if z.peek[0] != '\n' && z.peek[0] != 0 {
+				cp := z.consumeEscapedCP()
+				var tmp [utf8.UTFMax]byte
+				n := utf8.EncodeRune(tmp[:], cp)
+				frag = append(frag, tmp[:n]...)
+			} else {
+				z.err = &ParseError{
+					Type:    TokenBadURI,
+					Message: fmt.Sprintf("bare url() with invalid escape"),
+				}
+				return Token{
+					Type:  TokenBadURI,
+					Value: string(frag) + z.consumeBadURL(),
+					Extra: &TokenExtraError{Err: z.err},
+				}
+			}
+		} else {
+			frag = append(frag, by)
+		}
+	}
 }
 
 // §4.3.6
 func (z *Tokenizer) consumeUnicodeRange() Token {
+	var sdigits [6]byte
+	var by byte
+	haveQuestionMarks := false
+	i := 0
+	for {
+		by = z.nextByte()
+		if i >= 6 {
+			break // weird condition so that unreadByte() works
+		}
+		if by == '?' {
+			sdigits[i] = by
+			haveQuestionMarks = true
+			i++
+		} else if !haveQuestionMarks && isHexDigit(by) {
+			sdigits[i] = by
+			i++
+		} else {
+			break
+		}
+	}
+	z.unreadByte()
+	slen := i
+	var edigits [6]byte
+	var elen int
+	z.repeek()
+
+	if haveQuestionMarks {
+		copy(edigits[:slen], sdigits[:slen])
+		elen = slen
+		for idx := range sdigits {
+			if sdigits[idx] == '?' {
+				sdigits[idx] = '0'
+				edigits[idx] = 'F'
+			}
+		}
+	} else if z.peek[0] == '-' && isHexDigit(z.peek[1]) {
+		z.nextByte() // '-'
+		i = 0
+		for {
+			by = z.nextByte()
+			if i < 6 && isHexDigit(by) {
+				edigits[i] = by
+				i++
+			} else {
+				break
+			}
+		}
+		z.unreadByte()
+		elen = i
+	} else {
+		copy(edigits[:], sdigits[:])
+		elen = slen
+	}
+
+	startCP, err := strconv.ParseInt(string(sdigits[:slen]), 16, 22)
+	if err != nil {
+		panic(fmt.Sprintf("ParseInt failure: %s", err))
+	}
+	endCP, err := strconv.ParseInt(string(edigits[:elen]), 16, 22)
+	if err != nil {
+		panic(fmt.Sprintf("ParseInt failure: %s", err))
+	}
+	e := &TokenExtraUnicodeRange{
+		Start: rune(startCP),
+		End:   rune(endCP),
+	}
+	return Token{
+		Type:  TokenUnicodeRange,
+		Value: e.String(),
+		Extra: e,
+	}
+}
+
+func (z *Tokenizer) consumeComment() Token {
+	var frag []byte
+	var by byte
+	for {
+		by = z.nextByte()
+		if by == '*' {
+			z.repeek()
+			if z.peek[0] == '/' {
+				z.nextByte() // '/'
+				return Token{
+					Type:  TokenComment,
+					Value: "/*" + string(frag) + "*/",
+				}
+			}
+		} else if by == 0 {
+			return Token{
+				Type:  TokenComment,
+				Value: "/*" + string(frag) + "*/",
+			}
+		}
+		frag = append(frag, by)
+	}
 }
 
 // §4.3.7
+// after the "\"
 func (z *Tokenizer) consumeEscapedCP() rune {
+	by := z.nextByte()
+	if by == 0 {
+		return utf8.RuneError
+	} else if isHexDigit(by) {
+		var digits = make([]byte, 6)
+		digits[0] = by
+		i := 1
+		// (!) weird looping condition so that we UnreadByte() at the end
+		for {
+			by = z.nextByte()
+			if i < 6 && isHexDigit(by) {
+				digits[i] = by
+				i++
+			} else {
+				break
+			}
+		}
+
+		if isNotWhitespace(rune(by)) && by != 0 {
+			z.unreadByte()
+		}
+		digits = digits[:i]
+		// 16 = hex, 22 = bit width of unicode
+		cpi, err := strconv.ParseInt(string(digits), 16, 22)
+		if err != nil || cpi == 0 || cpi > utf8.MaxRune {
+			return utf8.RuneError
+		}
+		return rune(cpi)
+	} else {
+		z.unreadByte()
+		ru, _, err := z.r.ReadRune()
+		if err == io.EOF {
+			z.err = io.EOF
+			return utf8.RuneError
+		} else if err != nil {
+			z.err = err
+			panic(err)
+		} else {
+			return ru
+		}
+	}
 }
 
 // §4.3.11
 func (z *Tokenizer) consumeName() string {
+	var frag []byte
+	var by byte
+	for {
+		by = z.nextByte()
+		if by == '\\' {
+			z.repeek()
+			if z.peek[0] != '\n' && z.peek[0] != 0 {
+				cp := z.consumeEscapedCP()
+				var tmp [utf8.UTFMax]byte
+				n := utf8.EncodeRune(tmp[:], cp)
+				frag = append(frag, tmp[:n]...)
+				continue
+			}
+		} else if isNameCode(by) {
+			frag = append(frag, by)
+			continue
+		}
+
+		return string(frag)
+	}
 }
 
 // §4.3.12
@@ -409,7 +835,7 @@ func (z *Tokenizer) consumeNumericInner() (repr []byte, notInteger bool) {
 		}
 		if by != 0 {
 			// don't attempt to unread EOF
-			z.r.UnreadByte()
+			z.unreadByte()
 		}
 	}
 
@@ -425,9 +851,9 @@ func (z *Tokenizer) consumeNumericInner() (repr []byte, notInteger bool) {
 		z.repeek()
 	}
 	// [eE][+-]?[0-9]
-	if z.peek[0] == 'e' || z.peek[0] == 'E' {
+	if (z.peek[0] == 'e') || (z.peek[0] == 'E') {
 		var n int
-		if z.peek[1] == '+' && z.peek[1] == '-' && ('0' <= z.peek[2] && z.peek[2] <= '9') {
+		if (z.peek[1] == '+' || z.peek[1] == '-') && ('0' <= z.peek[2] && z.peek[2] <= '9') {
 			n = 3
 		} else if '0' <= z.peek[1] && z.peek[1] <= '9' {
 			n = 2
@@ -446,4 +872,23 @@ func (z *Tokenizer) consumeNumericInner() (repr []byte, notInteger bool) {
 
 // §4.3.14
 func (z *Tokenizer) consumeBadURL() string {
+	var frag []byte
+	var by byte
+	for {
+		by = z.nextByte()
+		if by == ')' || by == 0 {
+			return string(frag)
+		} else if by == '\\' {
+			z.repeek()
+			if z.peek[0] != '\n' {
+				// Allow for escaped right paren "\)"
+				cp := z.consumeEscapedCP()
+				var tmp [utf8.UTFMax]byte
+				n := utf8.EncodeRune(tmp[:], cp)
+				frag = append(frag, tmp[:n]...)
+				continue
+			}
+		}
+		frag = append(frag, by)
+	}
 }

From 7690df407b02a60495b9aa1397d402a3c90fe108 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 14:17:46 -0700
Subject: [PATCH 07/33] fix all usages of "starts with a valid escape": cannot
 unread after peeking

---
 scanner/tokenizer.go | 47 +++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 24e8720..e30de15 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -305,7 +305,7 @@ func (z *Tokenizer) consume() Token {
 		if z.nextIsNumber() {
 			return z.consumeNumeric()
 		}
-		z.readByte() // re-read, fall down to TokenDelim
+		z.nextByte() // re-read, fall down to TokenDelim
 	case '-':
 		z.unreadByte()
 		z.repeek()
@@ -319,14 +319,14 @@ func (z *Tokenizer) consume() Token {
 			z.r.Discard(2)
 			return premadeTokens['C']
 		}
-		z.readByte() // re-read, fall down to TokenDelim
+		z.nextByte() // re-read, fall down to TokenDelim
 	case '.':
 		z.unreadByte()
 		z.repeek()
 		if z.nextIsNumber() {
 			return z.consumeNumeric()
 		}
-		z.readByte() // re-read, fall down to TokenDelim
+		z.nextByte() // re-read, fall down to TokenDelim
 	case '/':
 		z.repeek()
 		if z.peek[0] == '*' {
@@ -355,7 +355,7 @@ func (z *Tokenizer) consume() Token {
 			// input stream has a valid escape
 			return z.consumeIdentish()
 		}
-		z.readByte()
+		z.nextByte()
 		z.err = errBadEscape
 		return premadeTokens['\\']
 	case 'U', 'u':
@@ -494,10 +494,11 @@ func (z *Tokenizer) consumeIdentish() Token {
 			Type:  TokenFunction,
 			Value: s,
 		}
-	}
-	return Token{
-		Type:  TokenIdent,
-		Value: s,
+	} else {
+		return Token{
+			Type:  TokenIdent,
+			Value: s,
+		}
 	}
 }
 
@@ -525,14 +526,18 @@ func (z *Tokenizer) consumeString(delim byte) Token {
 				Extra: &TokenExtraError{Err: z.err},
 			}
 		} else if by == '\\' {
+			z.unreadByte()
 			z.repeek()
-			if z.peek[0] == 0 {
+			if z.peek[1] == 0 {
 				// escape @ EOF, ignore.
-			} else if z.peek[0] == '\n' {
+				z.nextByte() // '\'
+			} else if z.peek[1] == '\n' {
 				// valid escaped newline, ignore.
-				z.r.Discard(1)
+				z.nextByte() // '\'
+				z.nextByte() // newline
 			} else if true {
 				// stream will always contain a valid escape here
+				z.nextByte() // '\'
 				cp := z.consumeEscapedCP()
 				var tmp [utf8.UTFMax]byte
 				n := utf8.EncodeRune(tmp[:], cp)
@@ -629,8 +634,10 @@ func (z *Tokenizer) consumeURL() Token {
 				Extra: &TokenExtraError{Err: z.err},
 			}
 		} else if by == '\\' {
+			z.unreadByte()
 			z.repeek()
-			if z.peek[0] != '\n' && z.peek[0] != 0 {
+			if z.nextIsEscape() {
+				z.nextByte() // '\'
 				cp := z.consumeEscapedCP()
 				var tmp [utf8.UTFMax]byte
 				n := utf8.EncodeRune(tmp[:], cp)
@@ -804,20 +811,25 @@ func (z *Tokenizer) consumeName() string {
 	for {
 		by = z.nextByte()
 		if by == '\\' {
+			z.unreadByte()
 			z.repeek()
-			if z.peek[0] != '\n' && z.peek[0] != 0 {
+			if z.nextIsEscape() {
+				z.nextByte()
 				cp := z.consumeEscapedCP()
 				var tmp [utf8.UTFMax]byte
 				n := utf8.EncodeRune(tmp[:], cp)
 				frag = append(frag, tmp[:n]...)
 				continue
+			} else {
+				return string(frag)
 			}
 		} else if isNameCode(by) {
 			frag = append(frag, by)
 			continue
+		} else {
+			z.unreadByte()
+			return string(frag)
 		}
-
-		return string(frag)
 	}
 }
 
@@ -879,8 +891,10 @@ func (z *Tokenizer) consumeBadURL() string {
 		if by == ')' || by == 0 {
 			return string(frag)
 		} else if by == '\\' {
+			z.unreadByte()
 			z.repeek()
-			if z.peek[0] != '\n' {
+			if z.nextIsEscape() {
+				z.nextByte() // '\'
 				// Allow for escaped right paren "\)"
 				cp := z.consumeEscapedCP()
 				var tmp [utf8.UTFMax]byte
@@ -888,6 +902,7 @@ func (z *Tokenizer) consumeBadURL() string {
 				frag = append(frag, tmp[:n]...)
 				continue
 			}
+			z.nextByte() // '\'
 		}
 		frag = append(frag, by)
 	}

From 1080914cd3fe669d75ac9209c633080413e23f5c Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 14:30:00 -0700
Subject: [PATCH 08/33] Change test data, make more fixes

---
 scanner/scanner_test.go | 52 +++++++++++++++++++++++++++++------------
 scanner/tokenizer.go    | 10 ++++----
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index a349e16..46d9fe3 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -5,10 +5,23 @@
 package scanner
 
 import (
+	"bytes"
 	"strings"
 	"testing"
 )
 
+func Fuzz(b []byte) int {
+	tz := NewTokenizer(bytes.NewReader(b))
+	for {
+		tt := tz.Next()
+		fmt.Printf("%v\n", tt)
+		if tt.Type.StopToken() {
+			break
+		}
+	}
+	return 1
+}
+
 func TestMatchers(t *testing.T) {
 	// Just basic checks, not exhaustive at all.
 	checkMatch := func(s string, ttList ...interface{}) {
@@ -54,31 +67,40 @@ func TestMatchers(t *testing.T) {
 	checkMatch("42%", TokenPercentage, "42")
 	checkMatch("4.2%", TokenPercentage, "4.2")
 	checkMatch(".42%", TokenPercentage, ".42")
-	checkMatch("42px", TokenDimension, "42px")
-	checkMatch("url(http://domain.com)", TokenURI, "url(http://domain.com)")
-	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "url( http://domain.com/uri/between/space )")
-	checkMatch("url('http://domain.com/uri/between/single/quote')", TokenURI, "url('http://domain.com/uri/between/single/quote')")
-	checkMatch(`url("http://domain.com/uri/between/double/quote")`, TokenURI, `url("http://domain.com/uri/between/double/quote")`)
-	checkMatch("url(http://domain.com/?parentheses=%28)", TokenURI, "url(http://domain.com/?parentheses=%28)")
-	checkMatch("url( http://domain.com/?parentheses=%28&between=space )", TokenURI, "url( http://domain.com/?parentheses=%28&between=space )")
-	checkMatch("url('http://domain.com/uri/(parentheses)/between/single/quote')", TokenURI, "url('http://domain.com/uri/(parentheses)/between/single/quote')")
-	checkMatch(`url("http://domain.com/uri/(parentheses)/between/double/quote")`, TokenURI, `url("http://domain.com/uri/(parentheses)/between/double/quote")`)
+	checkMatch("42px", TokenDimension, "42") // TODO check the dimension stored in .Extra
+	checkMatch("url(http://domain.com)", TokenURI, "http://domain.com")
+	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "http://domain.com/uri/between/space")
+	checkMatch("url('http://domain.com/uri/between/single/quote')", TokenURI, "http://domain.com/uri/between/single/quote")
+	checkMatch(`url("http://domain.com/uri/between/double/quote")`, TokenURI, `http://domain.com/uri/between/double/quote`)
+	checkMatch("url(http://domain.com/?parentheses=%28)", TokenURI, "http://domain.com/?parentheses=%28")
+	checkMatch("url( http://domain.com/?parentheses=%28&between=space )", TokenURI, "http://domain.com/?parentheses=%28&between=space")
+	checkMatch("url('http://domain.com/uri/(parentheses)/between/single/quote')", TokenURI, "http://domain.com/uri/(parentheses)/between/single/quote")
+	checkMatch(`url("http://domain.com/uri/(parentheses)/between/double/quote")`, TokenURI, `http://domain.com/uri/(parentheses)/between/double/quote`)
+	checkMatch(`url(http://domain.com/uri/\(bare%20escaped\)/parentheses)`, TokenURI, `http://domain.com/uri/(bare%20escaped)/parentheses`)
 	checkMatch("url(http://domain.com/uri/1)url(http://domain.com/uri/2)",
-		TokenURI, "url(http://domain.com/uri/1)",
-		TokenURI, "url(http://domain.com/uri/2)",
+		TokenURI, "http://domain.com/uri/1",
+		TokenURI, "http://domain.com/uri/2",
+	)
+	checkMatch("url(http://domain.com/uri/1) url(http://domain.com/uri/2)",
+		TokenURI, "http://domain.com/uri/1",
+		TokenS, " ",
+		TokenURI, "http://domain.com/uri/2",
 	)
 	checkMatch("U+0042", TokenUnicodeRange, "U+0042")
 	checkMatch("<!--", TokenCDO, "<!--")
 	checkMatch("-->", TokenCDC, "-->")
-	checkMatch("   \n   \t   \n", TokenS, "   \n   \t   \n")
-	checkMatch("/* foo */", TokenComment, "/* foo */")
+	checkMatch("   \n   \t   \n", TokenS, "\n") // TODO - whitespace preservation
+	checkMatch("/**/", TokenComment, "")
+	checkMatch("/*foo*/", TokenComment, "foo")
+	checkMatch("/* foo */", TokenComment, " foo ")
 	checkMatch("bar(", TokenFunction, "bar")
 	checkMatch("~=", TokenIncludes, "~=")
 	checkMatch("|=", TokenDashMatch, "|=")
+	checkMatch("||", TokenColumn, "||")
 	checkMatch("^=", TokenPrefixMatch, "^=")
 	checkMatch("$=", TokenSuffixMatch, "$=")
 	checkMatch("*=", TokenSubstringMatch, "*=")
-	checkMatch("{", TokenChar, "{")
+	checkMatch("{", TokenOpenBrace, "{")
 	// checkMatch("\uFEFF", TokenBOM, "\uFEFF")
-	checkMatch(`╯︵┻━┻"stuff"`, TokenIdent, "╯︵┻━┻", TokenString, `"stuff"`)
+	checkMatch(`╯︵┻━┻"stuff"`, TokenIdent, "╯︵┻━┻", TokenString, "stuff")
 }
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index e30de15..04dd726 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -316,7 +316,7 @@ func (z *Tokenizer) consume() Token {
 			return z.consumeIdentish()
 		}
 		if z.nextCompare("-->") {
-			z.r.Discard(2)
+			z.r.Discard(3)
 			return premadeTokens['C']
 		}
 		z.nextByte() // re-read, fall down to TokenDelim
@@ -560,7 +560,9 @@ func (z *Tokenizer) consumeURL() Token {
 			Value: "",
 		}
 	} else if z.peek[0] == '\'' || z.peek[0] == '"' {
-		t := z.consumeString(z.peek[0])
+		delim := z.peek[0]
+		z.nextByte()
+		t := z.consumeString(delim)
 		if t.Type == TokenBadString {
 			t.Type = TokenBadURI
 			t.Value += z.consumeBadURL()
@@ -745,13 +747,13 @@ func (z *Tokenizer) consumeComment() Token {
 				z.nextByte() // '/'
 				return Token{
 					Type:  TokenComment,
-					Value: "/*" + string(frag) + "*/",
+					Value: string(frag),
 				}
 			}
 		} else if by == 0 {
 			return Token{
 				Type:  TokenComment,
-				Value: "/*" + string(frag) + "*/",
+				Value: string(frag),
 			}
 		}
 		frag = append(frag, by)

From b99c1dd6dbf19025c0c0edbeb7c1e9f075f49140 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 14:30:15 -0700
Subject: [PATCH 09/33] add fuzzing corpus from existing testdata

---
 scanner/testdata/fuzz/corpus/test-1  | 1 +
 scanner/testdata/fuzz/corpus/test-10 | 1 +
 scanner/testdata/fuzz/corpus/test-11 | 1 +
 scanner/testdata/fuzz/corpus/test-12 | 1 +
 scanner/testdata/fuzz/corpus/test-13 | 1 +
 scanner/testdata/fuzz/corpus/test-14 | 1 +
 scanner/testdata/fuzz/corpus/test-15 | 1 +
 scanner/testdata/fuzz/corpus/test-16 | 1 +
 scanner/testdata/fuzz/corpus/test-17 | 1 +
 scanner/testdata/fuzz/corpus/test-18 | 1 +
 scanner/testdata/fuzz/corpus/test-19 | 1 +
 scanner/testdata/fuzz/corpus/test-2  | 1 +
 scanner/testdata/fuzz/corpus/test-20 | 1 +
 scanner/testdata/fuzz/corpus/test-21 | 1 +
 scanner/testdata/fuzz/corpus/test-22 | 1 +
 scanner/testdata/fuzz/corpus/test-23 | 1 +
 scanner/testdata/fuzz/corpus/test-24 | 1 +
 scanner/testdata/fuzz/corpus/test-25 | 1 +
 scanner/testdata/fuzz/corpus/test-26 | 1 +
 scanner/testdata/fuzz/corpus/test-27 | 1 +
 scanner/testdata/fuzz/corpus/test-28 | 1 +
 scanner/testdata/fuzz/corpus/test-29 | 1 +
 scanner/testdata/fuzz/corpus/test-3  | 1 +
 scanner/testdata/fuzz/corpus/test-30 | 1 +
 scanner/testdata/fuzz/corpus/test-31 | 1 +
 scanner/testdata/fuzz/corpus/test-32 | 1 +
 scanner/testdata/fuzz/corpus/test-33 | 1 +
 scanner/testdata/fuzz/corpus/test-34 | 1 +
 scanner/testdata/fuzz/corpus/test-35 | 1 +
 scanner/testdata/fuzz/corpus/test-36 | 1 +
 scanner/testdata/fuzz/corpus/test-37 | 2 ++
 scanner/testdata/fuzz/corpus/test-38 | 1 +
 scanner/testdata/fuzz/corpus/test-39 | 1 +
 scanner/testdata/fuzz/corpus/test-4  | 1 +
 scanner/testdata/fuzz/corpus/test-40 | 1 +
 scanner/testdata/fuzz/corpus/test-41 | 1 +
 scanner/testdata/fuzz/corpus/test-42 | 1 +
 scanner/testdata/fuzz/corpus/test-43 | 1 +
 scanner/testdata/fuzz/corpus/test-44 | 1 +
 scanner/testdata/fuzz/corpus/test-45 | 1 +
 scanner/testdata/fuzz/corpus/test-46 | 1 +
 scanner/testdata/fuzz/corpus/test-47 | 1 +
 scanner/testdata/fuzz/corpus/test-48 | 1 +
 scanner/testdata/fuzz/corpus/test-49 | 1 +
 scanner/testdata/fuzz/corpus/test-5  | 1 +
 scanner/testdata/fuzz/corpus/test-6  | 1 +
 scanner/testdata/fuzz/corpus/test-7  | 1 +
 scanner/testdata/fuzz/corpus/test-8  | 1 +
 scanner/testdata/fuzz/corpus/test-9  | 1 +
 49 files changed, 50 insertions(+)
 create mode 100644 scanner/testdata/fuzz/corpus/test-1
 create mode 100644 scanner/testdata/fuzz/corpus/test-10
 create mode 100644 scanner/testdata/fuzz/corpus/test-11
 create mode 100644 scanner/testdata/fuzz/corpus/test-12
 create mode 100644 scanner/testdata/fuzz/corpus/test-13
 create mode 100644 scanner/testdata/fuzz/corpus/test-14
 create mode 100644 scanner/testdata/fuzz/corpus/test-15
 create mode 100644 scanner/testdata/fuzz/corpus/test-16
 create mode 100644 scanner/testdata/fuzz/corpus/test-17
 create mode 100644 scanner/testdata/fuzz/corpus/test-18
 create mode 100644 scanner/testdata/fuzz/corpus/test-19
 create mode 100644 scanner/testdata/fuzz/corpus/test-2
 create mode 100644 scanner/testdata/fuzz/corpus/test-20
 create mode 100644 scanner/testdata/fuzz/corpus/test-21
 create mode 100644 scanner/testdata/fuzz/corpus/test-22
 create mode 100644 scanner/testdata/fuzz/corpus/test-23
 create mode 100644 scanner/testdata/fuzz/corpus/test-24
 create mode 100644 scanner/testdata/fuzz/corpus/test-25
 create mode 100644 scanner/testdata/fuzz/corpus/test-26
 create mode 100644 scanner/testdata/fuzz/corpus/test-27
 create mode 100644 scanner/testdata/fuzz/corpus/test-28
 create mode 100644 scanner/testdata/fuzz/corpus/test-29
 create mode 100644 scanner/testdata/fuzz/corpus/test-3
 create mode 100644 scanner/testdata/fuzz/corpus/test-30
 create mode 100644 scanner/testdata/fuzz/corpus/test-31
 create mode 100644 scanner/testdata/fuzz/corpus/test-32
 create mode 100644 scanner/testdata/fuzz/corpus/test-33
 create mode 100644 scanner/testdata/fuzz/corpus/test-34
 create mode 100644 scanner/testdata/fuzz/corpus/test-35
 create mode 100644 scanner/testdata/fuzz/corpus/test-36
 create mode 100644 scanner/testdata/fuzz/corpus/test-37
 create mode 100644 scanner/testdata/fuzz/corpus/test-38
 create mode 100644 scanner/testdata/fuzz/corpus/test-39
 create mode 100644 scanner/testdata/fuzz/corpus/test-4
 create mode 100644 scanner/testdata/fuzz/corpus/test-40
 create mode 100644 scanner/testdata/fuzz/corpus/test-41
 create mode 100644 scanner/testdata/fuzz/corpus/test-42
 create mode 100644 scanner/testdata/fuzz/corpus/test-43
 create mode 100644 scanner/testdata/fuzz/corpus/test-44
 create mode 100644 scanner/testdata/fuzz/corpus/test-45
 create mode 100644 scanner/testdata/fuzz/corpus/test-46
 create mode 100644 scanner/testdata/fuzz/corpus/test-47
 create mode 100644 scanner/testdata/fuzz/corpus/test-48
 create mode 100644 scanner/testdata/fuzz/corpus/test-49
 create mode 100644 scanner/testdata/fuzz/corpus/test-5
 create mode 100644 scanner/testdata/fuzz/corpus/test-6
 create mode 100644 scanner/testdata/fuzz/corpus/test-7
 create mode 100644 scanner/testdata/fuzz/corpus/test-8
 create mode 100644 scanner/testdata/fuzz/corpus/test-9

diff --git a/scanner/testdata/fuzz/corpus/test-1 b/scanner/testdata/fuzz/corpus/test-1
new file mode 100644
index 0000000..85df507
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-1
@@ -0,0 +1 @@
+abcd
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-10 b/scanner/testdata/fuzz/corpus/test-10
new file mode 100644
index 0000000..1340eb3
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-10
@@ -0,0 +1 @@
+#name
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-11 b/scanner/testdata/fuzz/corpus/test-11
new file mode 100644
index 0000000..5dbaeac
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-11
@@ -0,0 +1 @@
+##name
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-12 b/scanner/testdata/fuzz/corpus/test-12
new file mode 100644
index 0000000..88cde2c
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-12
@@ -0,0 +1 @@
+42''
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-13 b/scanner/testdata/fuzz/corpus/test-13
new file mode 100644
index 0000000..947355b
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-13
@@ -0,0 +1 @@
++42
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-14 b/scanner/testdata/fuzz/corpus/test-14
new file mode 100644
index 0000000..67f7ad0
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-14
@@ -0,0 +1 @@
+-42
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-15 b/scanner/testdata/fuzz/corpus/test-15
new file mode 100644
index 0000000..8012ebb
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-15
@@ -0,0 +1 @@
+4.2
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-16 b/scanner/testdata/fuzz/corpus/test-16
new file mode 100644
index 0000000..0401f1e
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-16
@@ -0,0 +1 @@
+.42
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-17 b/scanner/testdata/fuzz/corpus/test-17
new file mode 100644
index 0000000..f8c987b
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-17
@@ -0,0 +1 @@
++.42
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-18 b/scanner/testdata/fuzz/corpus/test-18
new file mode 100644
index 0000000..3273e87
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-18
@@ -0,0 +1 @@
+-.42
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-19 b/scanner/testdata/fuzz/corpus/test-19
new file mode 100644
index 0000000..67a9ae6
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-19
@@ -0,0 +1 @@
+42%
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-2 b/scanner/testdata/fuzz/corpus/test-2
new file mode 100644
index 0000000..af3501d
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-2
@@ -0,0 +1 @@
+"abcd"
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-20 b/scanner/testdata/fuzz/corpus/test-20
new file mode 100644
index 0000000..d44e379
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-20
@@ -0,0 +1 @@
+4.2%
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-21 b/scanner/testdata/fuzz/corpus/test-21
new file mode 100644
index 0000000..61542cd
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-21
@@ -0,0 +1 @@
+.42%
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-22 b/scanner/testdata/fuzz/corpus/test-22
new file mode 100644
index 0000000..9996f64
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-22
@@ -0,0 +1 @@
+42px
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-23 b/scanner/testdata/fuzz/corpus/test-23
new file mode 100644
index 0000000..6b16595
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-23
@@ -0,0 +1 @@
+url(http://domain.com)
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-24 b/scanner/testdata/fuzz/corpus/test-24
new file mode 100644
index 0000000..849a2f0
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-24
@@ -0,0 +1 @@
+url( http://domain.com/uri/between/space )
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-25 b/scanner/testdata/fuzz/corpus/test-25
new file mode 100644
index 0000000..9efe089
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-25
@@ -0,0 +1 @@
+url('http://domain.com/uri/between/single/quote')
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-26 b/scanner/testdata/fuzz/corpus/test-26
new file mode 100644
index 0000000..5d37d41
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-26
@@ -0,0 +1 @@
+url("http://domain.com/uri/between/double/quote")
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-27 b/scanner/testdata/fuzz/corpus/test-27
new file mode 100644
index 0000000..4b67378
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-27
@@ -0,0 +1 @@
+url(http://domain.com/?parentheses=%28)
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-28 b/scanner/testdata/fuzz/corpus/test-28
new file mode 100644
index 0000000..7b0f6cb
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-28
@@ -0,0 +1 @@
+url( http://domain.com/?parentheses=%28&between=space )
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-29 b/scanner/testdata/fuzz/corpus/test-29
new file mode 100644
index 0000000..e548025
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-29
@@ -0,0 +1 @@
+url('http://domain.com/uri/(parentheses)/between/single/quote')
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-3 b/scanner/testdata/fuzz/corpus/test-3
new file mode 100644
index 0000000..7d12177
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-3
@@ -0,0 +1 @@
+"ab'cd"
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-30 b/scanner/testdata/fuzz/corpus/test-30
new file mode 100644
index 0000000..4280336
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-30
@@ -0,0 +1 @@
+url("http://domain.com/uri/(parentheses)/between/double/quote")
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-31 b/scanner/testdata/fuzz/corpus/test-31
new file mode 100644
index 0000000..5416922
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-31
@@ -0,0 +1 @@
+url(http://domain.com/uri/\(bare%20escaped\)/parentheses)
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-32 b/scanner/testdata/fuzz/corpus/test-32
new file mode 100644
index 0000000..43d5b7d
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-32
@@ -0,0 +1 @@
+url(http://domain.com/uri/1)url(http://domain.com/uri/2)
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-33 b/scanner/testdata/fuzz/corpus/test-33
new file mode 100644
index 0000000..7871a01
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-33
@@ -0,0 +1 @@
+url(http://domain.com/uri/1) url(http://domain.com/uri/2)
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-34 b/scanner/testdata/fuzz/corpus/test-34
new file mode 100644
index 0000000..335d730
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-34
@@ -0,0 +1 @@
+U+0042
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-35 b/scanner/testdata/fuzz/corpus/test-35
new file mode 100644
index 0000000..3e4e3d6
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-35
@@ -0,0 +1 @@
+<!--
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-36 b/scanner/testdata/fuzz/corpus/test-36
new file mode 100644
index 0000000..172d758
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-36
@@ -0,0 +1 @@
+-->
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-37 b/scanner/testdata/fuzz/corpus/test-37
new file mode 100644
index 0000000..65ec83f
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-37
@@ -0,0 +1,2 @@
+   
+   	   
diff --git a/scanner/testdata/fuzz/corpus/test-38 b/scanner/testdata/fuzz/corpus/test-38
new file mode 100644
index 0000000..7068cde
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-38
@@ -0,0 +1 @@
+/**/
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-39 b/scanner/testdata/fuzz/corpus/test-39
new file mode 100644
index 0000000..112b37c
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-39
@@ -0,0 +1 @@
+/*foo*/
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-4 b/scanner/testdata/fuzz/corpus/test-4
new file mode 100644
index 0000000..cf25faa
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-4
@@ -0,0 +1 @@
+"ab\"cd"
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-40 b/scanner/testdata/fuzz/corpus/test-40
new file mode 100644
index 0000000..785ae54
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-40
@@ -0,0 +1 @@
+/* foo */
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-41 b/scanner/testdata/fuzz/corpus/test-41
new file mode 100644
index 0000000..adaa030
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-41
@@ -0,0 +1 @@
+bar(
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-42 b/scanner/testdata/fuzz/corpus/test-42
new file mode 100644
index 0000000..c926849
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-42
@@ -0,0 +1 @@
+~=
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-43 b/scanner/testdata/fuzz/corpus/test-43
new file mode 100644
index 0000000..279d9d3
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-43
@@ -0,0 +1 @@
+|=
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-44 b/scanner/testdata/fuzz/corpus/test-44
new file mode 100644
index 0000000..27cc728
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-44
@@ -0,0 +1 @@
+||
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-45 b/scanner/testdata/fuzz/corpus/test-45
new file mode 100644
index 0000000..49bae17
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-45
@@ -0,0 +1 @@
+^=
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-46 b/scanner/testdata/fuzz/corpus/test-46
new file mode 100644
index 0000000..3b0d355
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-46
@@ -0,0 +1 @@
+$=
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-47 b/scanner/testdata/fuzz/corpus/test-47
new file mode 100644
index 0000000..d2215e3
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-47
@@ -0,0 +1 @@
+*=
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-48 b/scanner/testdata/fuzz/corpus/test-48
new file mode 100644
index 0000000..81750b9
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-48
@@ -0,0 +1 @@
+{
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-49 b/scanner/testdata/fuzz/corpus/test-49
new file mode 100644
index 0000000..e9a4a3c
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-49
@@ -0,0 +1 @@
+╯︵┻━┻"stuff"
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-5 b/scanner/testdata/fuzz/corpus/test-5
new file mode 100644
index 0000000..bf1efad
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-5
@@ -0,0 +1 @@
+"ab\\cd"
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-6 b/scanner/testdata/fuzz/corpus/test-6
new file mode 100644
index 0000000..62fe39b
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-6
@@ -0,0 +1 @@
+'abcd'
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-7 b/scanner/testdata/fuzz/corpus/test-7
new file mode 100644
index 0000000..099e0f4
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-7
@@ -0,0 +1 @@
+'ab"cd'
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-8 b/scanner/testdata/fuzz/corpus/test-8
new file mode 100644
index 0000000..db5af35
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-8
@@ -0,0 +1 @@
+'ab\'cd'
\ No newline at end of file
diff --git a/scanner/testdata/fuzz/corpus/test-9 b/scanner/testdata/fuzz/corpus/test-9
new file mode 100644
index 0000000..17559ae
--- /dev/null
+++ b/scanner/testdata/fuzz/corpus/test-9
@@ -0,0 +1 @@
+'ab\\cd'
\ No newline at end of file

From aa841cee2102e74e4855521dfbf8fa9fbf6f65de Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 14:36:57 -0700
Subject: [PATCH 10/33] widen ParseInt calls to accept too-big codepoints

---
 scanner/fuzz.go         | 16 ++++++++++++++++
 scanner/scanner_test.go | 15 ++-------------
 scanner/tokenizer.go    |  7 ++++---
 3 files changed, 22 insertions(+), 16 deletions(-)
 create mode 100644 scanner/fuzz.go

diff --git a/scanner/fuzz.go b/scanner/fuzz.go
new file mode 100644
index 0000000..5b770be
--- /dev/null
+++ b/scanner/fuzz.go
@@ -0,0 +1,16 @@
+package scanner
+
+import "bytes"
+import "fmt"
+
+func Fuzz(b []byte) int {
+	tz := NewTokenizer(bytes.NewReader(b))
+	for {
+		tt := tz.Next()
+		fmt.Printf("%v\n", tt)
+		if tt.Type.StopToken() {
+			break
+		}
+	}
+	return 1
+}
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 46d9fe3..f2d011f 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -5,23 +5,10 @@
 package scanner
 
 import (
-	"bytes"
 	"strings"
 	"testing"
 )
 
-func Fuzz(b []byte) int {
-	tz := NewTokenizer(bytes.NewReader(b))
-	for {
-		tt := tz.Next()
-		fmt.Printf("%v\n", tt)
-		if tt.Type.StopToken() {
-			break
-		}
-	}
-	return 1
-}
-
 func TestMatchers(t *testing.T) {
 	// Just basic checks, not exhaustive at all.
 	checkMatch := func(s string, ttList ...interface{}) {
@@ -87,6 +74,8 @@ func TestMatchers(t *testing.T) {
 		TokenURI, "http://domain.com/uri/2",
 	)
 	checkMatch("U+0042", TokenUnicodeRange, "U+0042")
+	checkMatch("U+FFFFFF", TokenUnicodeRange, "U+FFFFFF")
+	checkMatch("U+??????", TokenUnicodeRange, "U+0000-FFFFFF")
 	checkMatch("<!--", TokenCDO, "<!--")
 	checkMatch("-->", TokenCDC, "-->")
 	checkMatch("   \n   \t   \n", TokenS, "\n") // TODO - whitespace preservation
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 04dd726..5cdc61e 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -717,11 +717,12 @@ func (z *Tokenizer) consumeUnicodeRange() Token {
 		elen = slen
 	}
 
-	startCP, err := strconv.ParseInt(string(sdigits[:slen]), 16, 22)
+	// 16 = hex, 32 = int32
+	startCP, err := strconv.ParseInt(string(sdigits[:slen]), 16, 32)
 	if err != nil {
 		panic(fmt.Sprintf("ParseInt failure: %s", err))
 	}
-	endCP, err := strconv.ParseInt(string(edigits[:elen]), 16, 22)
+	endCP, err := strconv.ParseInt(string(edigits[:elen]), 16, 32)
 	if err != nil {
 		panic(fmt.Sprintf("ParseInt failure: %s", err))
 	}
@@ -786,7 +787,7 @@ func (z *Tokenizer) consumeEscapedCP() rune {
 		}
 		digits = digits[:i]
 		// 16 = hex, 22 = bit width of unicode
-		cpi, err := strconv.ParseInt(string(digits), 16, 22)
+		cpi, err := strconv.ParseInt(string(digits), 16, 32)
 		if err != nil || cpi == 0 || cpi > utf8.MaxRune {
 			return utf8.RuneError
 		}

From b5986f0bb76b18615e247c8d33d12c3a7b5b2ed6 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 15:54:50 -0700
Subject: [PATCH 11/33] Add round-tripping test

---
 scanner/fuzz.go         |  83 +++++++++++++-
 scanner/scanner_test.go |  14 +++
 scanner/token.go        | 243 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 328 insertions(+), 12 deletions(-)

diff --git a/scanner/fuzz.go b/scanner/fuzz.go
index 5b770be..036bb97 100644
--- a/scanner/fuzz.go
+++ b/scanner/fuzz.go
@@ -1,16 +1,91 @@
 package scanner
 
-import "bytes"
-import "fmt"
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"reflect"
+)
 
 func Fuzz(b []byte) int {
+	var tokens []Token
+
 	tz := NewTokenizer(bytes.NewReader(b))
 	for {
 		tt := tz.Next()
-		fmt.Printf("%v\n", tt)
-		if tt.Type.StopToken() {
+		fmt.Printf("[OT] %v\n", tt)
+		if tt.Type == TokenError {
+			// We should not have reading errors
+			panic(tt)
+		} else if tt.Type == TokenEOF {
 			break
+		} else {
+			tokens = append(tokens, tt)
+		}
+	}
+
+	// Render and retokenize
+
+	var wr TokenRenderer
+	var rerenderBuf bytes.Buffer
+	success := false
+	defer func() {
+		if !success {
+			fmt.Println("RERENDER BUFFER:", rerenderBuf.String())
+		}
+	}()
+	pr, pw := io.Pipe()
+	defer pr.Close()
+
+	go func() {
+		writeTarget := io.MultiWriter(pw, &rerenderBuf)
+		for _, v := range tokens {
+			wr.WriteTokenTo(writeTarget, v)
+		}
+		pw.Close()
+	}()
+
+	tz = NewTokenizer(pr)
+	i := 0
+	for {
+		for i < len(tokens) && tokens[i].Type == TokenComment {
+			i++
+		}
+		tt := tz.Next()
+		fmt.Printf("[RT] %v\n", tt)
+		if tt.Type == TokenComment {
+			// Ignore comments while comparing
+			continue
+		}
+		if tt.Type == TokenError {
+			panic(tt)
+		}
+		if tt.Type == TokenEOF {
+			if i != len(tokens) {
+				panic(fmt.Sprintf("unexpected EOF: got EOF from retokenizer, but original token stream is at %d/%d\n%v", i, len(tokens), tokens))
+			} else {
+				break
+			}
+		}
+		if i == len(tokens) {
+			panic(fmt.Sprintf("expected EOF: reached end of original token stream but got %v from retokenizer\n%v", tt, tokens))
+		}
+
+		ot := tokens[i]
+		if tt.Type != ot.Type {
+			panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Type not equal)\n%v", tt, ot, tokens))
+		}
+		if tt.Value != ot.Value {
+			panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Value not equal)\n%v", tt, ot, tokens))
+		}
+		if TokenExtraTypeLookup[tt.Type] != nil {
+			if !reflect.DeepEqual(tt, ot) {
+				panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Extra not equal)\n%v", tt, ot, tokens))
+			}
 		}
+		i++
+		continue
 	}
+	success = true
 	return 1
 }
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index f2d011f..f92be94 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -31,6 +31,8 @@ func TestMatchers(t *testing.T) {
 				t.Errorf("double missing EOF after token %s, got %+v", s, tok)
 			}
 		}
+
+		Fuzz([]byte(s))
 	}
 
 	checkMatch("abcd", TokenIdent, "abcd")
@@ -92,4 +94,16 @@ func TestMatchers(t *testing.T) {
 	checkMatch("{", TokenOpenBrace, "{")
 	// checkMatch("\uFEFF", TokenBOM, "\uFEFF")
 	checkMatch(`╯︵┻━┻"stuff"`, TokenIdent, "╯︵┻━┻", TokenString, "stuff")
+
+	checkMatch("foo { bar: rgb(255, 0, 127); }",
+		TokenIdent, "foo", TokenS, " ",
+		TokenOpenBrace, "{", TokenS, " ",
+		TokenIdent, "bar", TokenColon, ":", TokenS, " ",
+		TokenFunction, "rgb",
+		TokenNumber, "255", TokenComma, ",", TokenS, " ",
+		TokenNumber, "0", TokenComma, ",", TokenS, " ",
+		TokenNumber, "127", TokenCloseParen, ")",
+		TokenSemicolon, ";", TokenS, " ",
+		TokenCloseBrace, "}",
+	)
 }
diff --git a/scanner/token.go b/scanner/token.go
index ab5b377..ce5233d 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -5,7 +5,12 @@
 
 package scanner
 
-import "fmt"
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+)
 
 // TokenType identifies the type of lexical tokens.
 type TokenType int
@@ -61,6 +66,8 @@ const (
 	TokenEOF
 	// From now on, only tokens from the CSS specification.
 	TokenIdent
+	TokenFunction
+	TokenDelim // Single character
 	TokenAtKeyword
 	TokenString
 	TokenHash
@@ -76,19 +83,19 @@ const (
 	// CSS Syntax Level 3 removes comments from the token stream, but they are
 	// preserved here.
 	TokenComment
-	TokenFunction
+
+	// Error tokens
+	TokenBadString
+	TokenBadURI
+	TokenBadEscape // a '\' right before a newline
+
+	// Fixed-string tokens
 	TokenIncludes
 	TokenDashMatch
 	TokenPrefixMatch
 	TokenSuffixMatch
 	TokenSubstringMatch
 	TokenColumn
-	TokenDelim
-	// Error tokens
-	TokenBadString
-	TokenBadURI
-	TokenBadEscape // a '\' right before a newline
-	// Single-character tokens
 	TokenColon
 	TokenSemicolon
 	TokenComma
@@ -240,3 +247,223 @@ func (e *TokenExtraError) ParseError() *ParseError {
 	}
 	return pe
 }
+
+func escapeIdentifier(s string) string {
+	// TODO
+	return s
+}
+
+func escapeDimension(s string) string {
+	if strings.HasPrefix(s, "e") || strings.HasPrefix(s, "E") {
+		return "\\" + escapeIdentifier(s)
+	}
+	return escapeIdentifier(s)
+}
+
+var escapeStringReplacer = strings.NewReplacer("\"", "\\\"", "\n", "\\0A ", "\\", "\\\\")
+
+func (t *Token) Render() string {
+	var buf bytes.Buffer
+	t.WriteTo(&buf)
+	return buf.String()
+}
+
+func (t *Token) WriteTo(w io.Writer) {
+	switch t.Type {
+	case TokenError:
+		return
+	case TokenEOF:
+		return
+	case TokenIdent:
+		fmt.Fprint(w, escapeIdentifier(t.Value))
+	case TokenAtKeyword:
+		fmt.Fprint(w, "@", escapeIdentifier(t.Value))
+	case TokenDelim:
+		if t.Value == "\\" {
+			fmt.Fprint(w, "\\\n")
+		} else {
+			fmt.Fprint(w, t.Value)
+		}
+	case TokenHash:
+		io.WriteString(w, "#")
+		fmt.Fprint(w, escapeIdentifier(t.Value))
+	case TokenPercentage:
+		fmt.Fprint(w, t.Value, "%")
+	case TokenDimension:
+		e := t.Extra.(*TokenExtraNumeric)
+		fmt.Fprint(w, t.Value, e.Dimension)
+	case TokenString:
+		io.WriteString(w, "\"")
+		escapeStringReplacer.WriteString(w, t.Value)
+		io.WriteString(w, "\"")
+	case TokenURI:
+		io.WriteString(w, "url(\"")
+		escapeStringReplacer.WriteString(w, t.Value)
+		io.WriteString(w, "\")")
+	case TokenUnicodeRange:
+		fmt.Fprint(w, t.Extra.String())
+	case TokenComment:
+		io.WriteString(w, "/*")
+		io.WriteString(w, t.Value)
+		io.WriteString(w, "/*")
+	case TokenFunction:
+		fmt.Fprint(w, t.Value, "(")
+
+	case TokenBadEscape, TokenBadString, TokenBadURI:
+		fmt.Fprint(w, t.Value)
+	default:
+		fmt.Fprint(w, t.Value)
+	}
+}
+
+// TokenRenderer takes care of the comment insertion rules for serialization.
+type TokenRenderer struct {
+	lastToken Token
+}
+
+func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) {
+	var prevKey, curKey interface{}
+	if r.lastToken.Type == TokenDelim {
+		prevKey = r.lastToken.Value[0]
+	} else {
+		prevKey = r.lastToken.Type
+	}
+	if t.Type == TokenDelim {
+		curKey = t.Value[0]
+	} else {
+		curKey = t.Type
+	}
+
+	m1, ok := commentInsertionRules[prevKey]
+	if ok {
+		if m1[curKey] {
+			io.WriteString(w, "/**/")
+		}
+	}
+
+	t.WriteTo(w)
+	r.lastToken = t
+}
+
+var commentInsertionThruCDC = map[interface{}]bool{
+	TokenIdent:        true,
+	TokenFunction:     true,
+	TokenURI:          true,
+	TokenBadURI:       true,
+	TokenNumber:       true,
+	TokenPercentage:   true,
+	TokenDimension:    true,
+	TokenUnicodeRange: true,
+	TokenCDC:          true,
+	'-':               true,
+	'(':               false,
+}
+
+var commentInsertionRules = map[interface{}]map[interface{}]bool{
+	TokenIdent: map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenURI:          true,
+		TokenBadURI:       true,
+		'-':               true,
+		TokenNumber:       true,
+		TokenPercentage:   true,
+		TokenDimension:    true,
+		TokenUnicodeRange: true,
+		TokenCDC:          true,
+		'(':               true,
+	},
+	TokenAtKeyword: commentInsertionThruCDC,
+	TokenHash:      commentInsertionThruCDC,
+	TokenDimension: commentInsertionThruCDC,
+	'#': map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenURI:          true,
+		TokenBadURI:       true,
+		TokenNumber:       true,
+		TokenPercentage:   true,
+		TokenDimension:    true,
+		TokenUnicodeRange: true,
+		TokenCDC:          false,
+		'-':               true,
+		'(':               false,
+	},
+	'-': map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenURI:          true,
+		TokenBadURI:       true,
+		TokenNumber:       true,
+		TokenPercentage:   true,
+		TokenDimension:    true,
+		TokenUnicodeRange: true,
+		TokenCDC:          false,
+		'-':               false,
+		'(':               false,
+	},
+	TokenNumber: map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenURI:          true,
+		TokenBadURI:       true,
+		TokenNumber:       true,
+		TokenPercentage:   true,
+		TokenDimension:    true,
+		TokenUnicodeRange: true,
+		TokenCDC:          false,
+		'-':               false,
+		'(':               false,
+	},
+	'@': map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenURI:          true,
+		TokenBadURI:       true,
+		TokenNumber:       false,
+		TokenPercentage:   false,
+		TokenDimension:    false,
+		TokenUnicodeRange: true,
+		TokenCDC:          false,
+		'-':               true,
+		'(':               false,
+	},
+	TokenUnicodeRange: map[interface{}]bool{
+		TokenIdent:        true,
+		TokenFunction:     true,
+		TokenNumber:       true,
+		TokenPercentage:   true,
+		TokenDimension:    true,
+		TokenUnicodeRange: false,
+		'?':               true,
+	},
+	'.': map[interface{}]bool{
+		TokenNumber:     true,
+		TokenPercentage: true,
+		TokenDimension:  true,
+	},
+	'+': map[interface{}]bool{
+		TokenNumber:     true,
+		TokenPercentage: true,
+		TokenDimension:  true,
+	},
+	'$': map[interface{}]bool{
+		'=': true,
+	},
+	'*': map[interface{}]bool{
+		'=': true,
+	},
+	'^': map[interface{}]bool{
+		'=': true,
+	},
+	'~': map[interface{}]bool{
+		'=': true,
+	},
+	'|': map[interface{}]bool{
+		'=': true,
+		'|': true,
+	},
+	'/': map[interface{}]bool{
+		'*': true,
+	},
+}

From 6e71edb8690e6322b52b34d0125b93743a9cec10 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 16:04:38 -0700
Subject: [PATCH 12/33] Fix: was discarding the leading 'u'

---
 scanner/scanner_test.go | 2 ++
 scanner/tokenizer.go    | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index f92be94..e4f9222 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -106,4 +106,6 @@ func TestMatchers(t *testing.T) {
 		TokenSemicolon, ";", TokenS, " ",
 		TokenCloseBrace, "}",
 	)
+	// Fuzzing results
+	checkMatch("ur(0", TokenFunction, "ur", TokenNumber, "0")
 }
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 5cdc61e..927d59d 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -359,9 +359,10 @@ func (z *Tokenizer) consume() Token {
 		z.err = errBadEscape
 		return premadeTokens['\\']
 	case 'U', 'u':
+		z.unreadByte()
 		z.repeek()
-		if z.peek[0] == '+' && (isHexDigit(z.peek[1]) || (z.peek[1] == '?')) {
-			z.r.Discard(1) // (!) only discard the plus sign
+		if z.peek[1] == '+' && (isHexDigit(z.peek[2]) || (z.peek[2] == '?')) {
+			z.r.Discard(2) // (!) only discard the U+
 			return z.consumeUnicodeRange()
 		}
 		break

From c5a4afb97fa722b95c380e9fb2f4f8ca653ba25c Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Wed, 14 Mar 2018 17:03:31 -0700
Subject: [PATCH 13/33] Fix more fuzzer findings

---
 scanner/fuzz.go         |   4 +-
 scanner/scanner_test.go |  59 +++++++++++++-------
 scanner/token.go        | 117 +++++++++++++++++++++++++++++++---------
 scanner/tokenizer.go    |  47 ++++++++++------
 4 files changed, 164 insertions(+), 63 deletions(-)

diff --git a/scanner/fuzz.go b/scanner/fuzz.go
index 036bb97..9eb7146 100644
--- a/scanner/fuzz.go
+++ b/scanner/fuzz.go
@@ -75,11 +75,11 @@ func Fuzz(b []byte) int {
 		if tt.Type != ot.Type {
 			panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Type not equal)\n%v", tt, ot, tokens))
 		}
-		if tt.Value != ot.Value {
+		if tt.Value != ot.Value && !tt.Type.StopToken() {
 			panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Value not equal)\n%v", tt, ot, tokens))
 		}
 		if TokenExtraTypeLookup[tt.Type] != nil {
-			if !reflect.DeepEqual(tt, ot) {
+			if !reflect.DeepEqual(tt, ot) && !tt.Type.StopToken() {
 				panic(fmt.Sprintf("retokenizer gave %v, expected %v (.Extra not equal)\n%v", tt, ot, tokens))
 			}
 		}
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index e4f9222..adee06a 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -5,6 +5,7 @@
 package scanner
 
 import (
+	"reflect"
 	"strings"
 	"testing"
 )
@@ -18,11 +19,24 @@ func TestMatchers(t *testing.T) {
 		for i < len(ttList) {
 			tt := ttList[i].(TokenType)
 			tVal := ttList[i+1].(string)
+			var tExtra TokenExtra
+			if TokenExtraTypeLookup[tt] != nil {
+				tExtra = ttList[i+2].(TokenExtra)
+			}
 			if tok := tz.Next(); tok.Type != tt || tok.Value != tVal {
 				t.Errorf("did not match: %s (got %s, wanted %s): %v", s, tok.Value, tVal, tok)
+			} else if tExtra != nil && !reflect.DeepEqual(tok.Extra, tExtra) {
+				if tt.StopToken() && tt != TokenError && tt != TokenEOF {
+					// mismatch ok
+				} else {
+					t.Errorf("did not match .Extra: %s (got %#v, wanted %#v): %v", s, tok.Extra, tExtra, tok)
+				}
 			}
 
 			i += 2
+			if TokenExtraTypeLookup[tt] != nil {
+				i++
+			}
 		}
 
 		if tok := tz.Next(); tok.Type != TokenEOF {
@@ -44,19 +58,21 @@ func TestMatchers(t *testing.T) {
 	checkMatch(`'ab"cd'`, TokenString, `ab"cd`)
 	checkMatch(`'ab\'cd'`, TokenString, `ab'cd`)
 	checkMatch(`'ab\\cd'`, TokenString, `ab\cd`)
-	checkMatch("#name", TokenHash, "name")
-	checkMatch("##name", TokenDelim, "#", TokenHash, "name")
-	checkMatch("42''", TokenNumber, "42", TokenString, "")
-	checkMatch("+42", TokenNumber, "+42")
-	checkMatch("-42", TokenNumber, "-42")
-	checkMatch("4.2", TokenNumber, "4.2")
-	checkMatch(".42", TokenNumber, ".42")
-	checkMatch("+.42", TokenNumber, "+.42")
-	checkMatch("-.42", TokenNumber, "-.42")
-	checkMatch("42%", TokenPercentage, "42")
-	checkMatch("4.2%", TokenPercentage, "4.2")
-	checkMatch(".42%", TokenPercentage, ".42")
-	checkMatch("42px", TokenDimension, "42") // TODO check the dimension stored in .Extra
+	checkMatch("#name", TokenHash, "name", &TokenExtraHash{IsIdentifier: true})
+	checkMatch("##name", TokenDelim, "#", TokenHash, "name", &TokenExtraHash{IsIdentifier: true})
+	checkMatch("42''", TokenNumber, "42", &TokenExtraNumeric{}, TokenString, "")
+	checkMatch("+42", TokenNumber, "+42", &TokenExtraNumeric{})
+	checkMatch("-42", TokenNumber, "-42", &TokenExtraNumeric{})
+	checkMatch("42.", TokenNumber, "42", &TokenExtraNumeric{}, TokenDelim, ".")
+	checkMatch("42.0", TokenNumber, "42.0", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("4.2", TokenNumber, "4.2", &TokenExtraNumeric{NonInteger: true})
+	checkMatch(".42", TokenNumber, ".42", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("+.42", TokenNumber, "+.42", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("-.42", TokenNumber, "-.42", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("42%", TokenPercentage, "42", &TokenExtraNumeric{})
+	checkMatch("4.2%", TokenPercentage, "4.2", &TokenExtraNumeric{NonInteger: true})
+	checkMatch(".42%", TokenPercentage, ".42", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("42px", TokenDimension, "42", &TokenExtraNumeric{Dimension: "px"}) // TODO check the dimension stored in .Extra
 	checkMatch("url(http://domain.com)", TokenURI, "http://domain.com")
 	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "http://domain.com/uri/between/space")
 	checkMatch("url('http://domain.com/uri/between/single/quote')", TokenURI, "http://domain.com/uri/between/single/quote")
@@ -75,9 +91,9 @@ func TestMatchers(t *testing.T) {
 		TokenS, " ",
 		TokenURI, "http://domain.com/uri/2",
 	)
-	checkMatch("U+0042", TokenUnicodeRange, "U+0042")
-	checkMatch("U+FFFFFF", TokenUnicodeRange, "U+FFFFFF")
-	checkMatch("U+??????", TokenUnicodeRange, "U+0000-FFFFFF")
+	checkMatch("U+0042", TokenUnicodeRange, "U+0042", &TokenExtraUnicodeRange{Start: 0x42, End: 0x42})
+	checkMatch("U+FFFFFF", TokenUnicodeRange, "U+FFFFFF", &TokenExtraUnicodeRange{Start: 0xFFFFFF, End: 0xFFFFFF})
+	checkMatch("U+??????", TokenUnicodeRange, "U+0000-FFFFFF", &TokenExtraUnicodeRange{Start: 0, End: 0xFFFFFF})
 	checkMatch("<!--", TokenCDO, "<!--")
 	checkMatch("-->", TokenCDC, "-->")
 	checkMatch("   \n   \t   \n", TokenS, "\n") // TODO - whitespace preservation
@@ -100,12 +116,15 @@ func TestMatchers(t *testing.T) {
 		TokenOpenBrace, "{", TokenS, " ",
 		TokenIdent, "bar", TokenColon, ":", TokenS, " ",
 		TokenFunction, "rgb",
-		TokenNumber, "255", TokenComma, ",", TokenS, " ",
-		TokenNumber, "0", TokenComma, ",", TokenS, " ",
-		TokenNumber, "127", TokenCloseParen, ")",
+		TokenNumber, "255", &TokenExtraNumeric{}, TokenComma, ",", TokenS, " ",
+		TokenNumber, "0", &TokenExtraNumeric{}, TokenComma, ",", TokenS, " ",
+		TokenNumber, "127", &TokenExtraNumeric{}, TokenCloseParen, ")",
 		TokenSemicolon, ";", TokenS, " ",
 		TokenCloseBrace, "}",
 	)
 	// Fuzzing results
-	checkMatch("ur(0", TokenFunction, "ur", TokenNumber, "0")
+	checkMatch("ur(0", TokenFunction, "ur", TokenNumber, "0", &TokenExtraNumeric{})
+	checkMatch("1\\15", TokenDimension, "1", &TokenExtraNumeric{Dimension: "\x15"})
+	checkMatch("url(0t')", TokenBadURI, "0t", &TokenExtraError{})
+	checkMatch("uri/", TokenIdent, "uri", TokenDelim, "/")
 }
diff --git a/scanner/token.go b/scanner/token.go
index ce5233d..49cab7c 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -10,6 +10,7 @@ import (
 	"fmt"
 	"io"
 	"strings"
+	"unicode/utf8"
 )
 
 // TokenType identifies the type of lexical tokens.
@@ -194,10 +195,7 @@ func (e *TokenExtraNumeric) String() string {
 	if e == nil {
 		return ""
 	}
-	if e.Dimension != "" {
-		return e.Dimension
-	}
-	return ""
+	return e.Dimension
 }
 
 // TokenExtraUnicodeRange is attached to a TokenUnicodeRange.
@@ -248,19 +246,81 @@ func (e *TokenExtraError) ParseError() *ParseError {
 	return pe
 }
 
-func escapeIdentifier(s string) string {
-	// TODO
-	return s
-}
+func escapeIdentifier(s string) string { return escapeIdent(s, 0) }
+func escapeDimension(s string) string  { return escapeIdent(s, 2) }
+
+func escapeIdent(s string, mode int) string {
+	if s == "" {
+		return ""
+	}
+	var buf bytes.Buffer
+	buf.Grow(len(s))
+	anyChanges := false
+
+	// Handle first character
+	// dashes allowed at start only for TokenIdent-ish
+	// eE not allowed at start for Dimension
+	if !isNameStart(s[0]) && s[0] != '-' && s[0] != 'e' && s[0] != 'E' {
+		buf.WriteByte('\\')
+		buf.WriteByte(s[0])
+		anyChanges = true
+	} else if s[0] == 'e' || s[0] == 'E' {
+		if mode == 2 {
+			buf.WriteByte('\\')
+			anyChanges = true
+		}
+		buf.WriteByte(s[0])
+	} else if s[0] == '-' {
+		if len(s) == 1 {
+			return "\\-"
+		} else if isNameStart(s[1]) {
+			buf.WriteByte('-')
+		} else {
+			buf.WriteString("\\-")
+		}
+	} else {
+		buf.WriteByte(s[0])
+	}
+	// Write the rest of the name
+	for i := 1; i < len(s); i++ {
+		if !isNameCode(s[i]) {
+			fmt.Fprintf(&buf, "\\%X", s[i])
+			anyChanges = true
+		} else {
+			buf.WriteByte(s[i])
+		}
+	}
 
-func escapeDimension(s string) string {
-	if strings.HasPrefix(s, "e") || strings.HasPrefix(s, "E") {
-		return "\\" + escapeIdentifier(s)
+	if !anyChanges {
+		return s
 	}
-	return escapeIdentifier(s)
+	return buf.String()
 }
 
-var escapeStringReplacer = strings.NewReplacer("\"", "\\\"", "\n", "\\0A ", "\\", "\\\\")
+func escapeString(s string) string {
+	var buf bytes.Buffer
+	buf.WriteByte('"')
+	for i := 0; i < len(s); i++ {
+		switch s[i] {
+		case '"':
+			buf.WriteString("\\\"")
+			continue
+		case '\n':
+			buf.WriteString("\\0A ")
+			continue
+		case '\\':
+			buf.WriteString("\\\\")
+			continue
+		}
+		if s[i] < utf8.RuneSelf && isNonPrintable(s[i]) {
+			fmt.Fprintf(&buf, "\\%X", s[i])
+			continue
+		}
+		buf.WriteByte(s[i])
+	}
+	buf.WriteByte('"')
+	return buf.String()
+}
 
 func (t *Token) Render() string {
 	var buf bytes.Buffer
@@ -291,26 +351,35 @@ func (t *Token) WriteTo(w io.Writer) {
 		fmt.Fprint(w, t.Value, "%")
 	case TokenDimension:
 		e := t.Extra.(*TokenExtraNumeric)
-		fmt.Fprint(w, t.Value, e.Dimension)
+		fmt.Fprint(w, t.Value, escapeDimension(e.Dimension))
 	case TokenString:
-		io.WriteString(w, "\"")
-		escapeStringReplacer.WriteString(w, t.Value)
-		io.WriteString(w, "\"")
+		io.WriteString(w, escapeString(t.Value))
 	case TokenURI:
-		io.WriteString(w, "url(\"")
-		escapeStringReplacer.WriteString(w, t.Value)
-		io.WriteString(w, "\")")
+		io.WriteString(w, "url(")
+		io.WriteString(w, escapeString(t.Value))
+		io.WriteString(w, ")")
 	case TokenUnicodeRange:
-		fmt.Fprint(w, t.Extra.String())
+		io.WriteString(w, t.Extra.String())
 	case TokenComment:
 		io.WriteString(w, "/*")
 		io.WriteString(w, t.Value)
 		io.WriteString(w, "/*")
 	case TokenFunction:
-		fmt.Fprint(w, t.Value, "(")
+		io.WriteString(w, t.Value)
+		io.WriteString(w, "(")
 
-	case TokenBadEscape, TokenBadString, TokenBadURI:
-		fmt.Fprint(w, t.Value)
+	case TokenBadEscape:
+		io.WriteString(w, "\\\n")
+	case TokenBadString:
+		io.WriteString(w, "\"")
+		io.WriteString(w, t.Value)
+		io.WriteString(w, "\n")
+	case TokenBadURI:
+		io.WriteString(w, "url(")
+		str := escapeString(t.Value)
+		str = strings.TrimSuffix(str, "\"")
+		io.WriteString(w, str)
+		io.WriteString(w, "\n)")
 	default:
 		fmt.Fprint(w, t.Value)
 	}
diff --git a/scanner/tokenizer.go b/scanner/tokenizer.go
index 927d59d..6c4babd 100644
--- a/scanner/tokenizer.go
+++ b/scanner/tokenizer.go
@@ -25,9 +25,17 @@ type Tokenizer struct {
 	err  error
 	peek [3]byte
 
+	ErrorMode int
+
 	tok Token
 }
 
+const (
+	// Default error mode - tokenization errors are represented as special tokens in the stream, and I/O errors are TokenError.
+	ErrorModeTokens = iota
+	ErrorModeFatal
+)
+
 // Construct a Tokenizer from the given input. Input need not be normalized.
 func NewTokenizer(r io.Reader) *Tokenizer {
 	return &Tokenizer{
@@ -89,7 +97,8 @@ func (z *Tokenizer) Err() error {
 func (z *Tokenizer) AcknowledgeError() {
 	_, ok := z.err.(*ParseError)
 	if !ok {
-		panic("cssparse: AcknowledgeError() called for a foreign (non-syntax) error")
+		// TODO ErrorMode
+		return
 	}
 	z.err = nil
 }
@@ -203,6 +212,10 @@ func isStartNumber(p []byte) bool {
 	return false
 }
 
+func isNonPrintable(by byte) bool {
+	return (0 <= by && by <= 0x08) || (0x0B == by) || (0x0E <= by && by <= 0x1F) || (0x7F == by)
+}
+
 // repeek must be called before the following:
 
 func (z *Tokenizer) nextIsEscape() bool {
@@ -356,7 +369,7 @@ func (z *Tokenizer) consume() Token {
 			return z.consumeIdentish()
 		}
 		z.nextByte()
-		z.err = errBadEscape
+		// z.err = errBadEscape
 		return premadeTokens['\\']
 	case 'U', 'u':
 		z.unreadByte()
@@ -517,14 +530,14 @@ func (z *Tokenizer) consumeString(delim byte) Token {
 			}
 		} else if by == '\n' {
 			z.unreadByte()
-			z.err = &ParseError{
+			/* z.err = */ er := &ParseError{
 				Type:    TokenBadString,
 				Message: "unterminated string",
 			}
 			return Token{
 				Type:  TokenBadString,
 				Value: string(frag),
-				Extra: &TokenExtraError{Err: z.err},
+				Extra: &TokenExtraError{Err: er},
 			}
 		} else if by == '\\' {
 			z.unreadByte()
@@ -567,12 +580,12 @@ func (z *Tokenizer) consumeURL() Token {
 		if t.Type == TokenBadString {
 			t.Type = TokenBadURI
 			t.Value += z.consumeBadURL()
-			z.err = &ParseError{
+			/* z.err = */ pe := &ParseError{
 				Type:    TokenBadURI,
 				Message: "unterminated string in url()",
 			}
 			t.Extra = &TokenExtraError{
-				Err: z.err,
+				Err: pe,
 			}
 			return t
 		}
@@ -585,12 +598,12 @@ func (z *Tokenizer) consumeURL() Token {
 		}
 		t.Type = TokenBadURI
 		t.Value += z.consumeBadURL()
-		z.err = &ParseError{
+		/* z.err = */ pe := &ParseError{
 			Type:    TokenBadURI,
 			Message: "url() with string missing close parenthesis",
 		}
 		t.Extra = &TokenExtraError{
-			Err: z.err,
+			Err: pe,
 		}
 		return t
 	}
@@ -607,34 +620,34 @@ func (z *Tokenizer) consumeURL() Token {
 				z.nextByte() // ')'
 				return Token{Type: TokenURI, Value: string(frag)}
 			}
-			z.err = &ParseError{
+			/* z.err = */ pe := &ParseError{
 				Type:    TokenBadURI,
 				Message: "bare url() with internal whitespace",
 			}
 			return Token{
 				Type:  TokenBadURI,
 				Value: string(frag) + z.consumeBadURL(),
-				Extra: &TokenExtraError{Err: z.err},
+				Extra: &TokenExtraError{Err: pe},
 			}
 		} else if by == '\'' || by == '"' || by == '(' {
-			z.err = &ParseError{
+			/* z.err = */ pe := &ParseError{
 				Type:    TokenBadURI,
 				Message: fmt.Sprintf("bare url() with illegal character '%c'", by),
 			}
 			return Token{
 				Type:  TokenBadURI,
 				Value: string(frag) + z.consumeBadURL(),
-				Extra: &TokenExtraError{Err: z.err},
+				Extra: &TokenExtraError{Err: pe},
 			}
-		} else if (0 <= by && by <= 0x08) || (0x0B == by) || (0x0E <= by && by <= 0x1F) || (0x7F == by) {
-			z.err = &ParseError{
+		} else if isNonPrintable(by) {
+			/* z.err = */ pe := &ParseError{
 				Type:    TokenBadURI,
 				Message: fmt.Sprintf("bare url() with unprintable character '%d'", by),
 			}
 			return Token{
 				Type:  TokenBadURI,
 				Value: string(frag) + z.consumeBadURL(),
-				Extra: &TokenExtraError{Err: z.err},
+				Extra: &TokenExtraError{Err: pe},
 			}
 		} else if by == '\\' {
 			z.unreadByte()
@@ -646,14 +659,14 @@ func (z *Tokenizer) consumeURL() Token {
 				n := utf8.EncodeRune(tmp[:], cp)
 				frag = append(frag, tmp[:n]...)
 			} else {
-				z.err = &ParseError{
+				/* z.err = */ pe := &ParseError{
 					Type:    TokenBadURI,
 					Message: fmt.Sprintf("bare url() with invalid escape"),
 				}
 				return Token{
 					Type:  TokenBadURI,
 					Value: string(frag) + z.consumeBadURL(),
-					Extra: &TokenExtraError{Err: z.err},
+					Extra: &TokenExtraError{Err: pe},
 				}
 			}
 		} else {

From 4c0a5effa1e3a1cb0d73c155e28fae2526b904ec Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 14:32:44 -0700
Subject: [PATCH 14/33] More fixes from fuzzing

---
 scanner/scanner_test.go | 14 +++++++++++++-
 scanner/token.go        | 13 ++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index adee06a..1a3b9c4 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -23,7 +23,9 @@ func TestMatchers(t *testing.T) {
 			if TokenExtraTypeLookup[tt] != nil {
 				tExtra = ttList[i+2].(TokenExtra)
 			}
-			if tok := tz.Next(); tok.Type != tt || tok.Value != tVal {
+			if tok := tz.Next(); tok.Type != tt {
+				t.Errorf("did not match: %s (got %v, wanted %v)", s, tok, tt)
+			} else if tok.Value != tVal {
 				t.Errorf("did not match: %s (got %s, wanted %s): %v", s, tok.Value, tVal, tok)
 			} else if tExtra != nil && !reflect.DeepEqual(tok.Extra, tExtra) {
 				if tt.StopToken() && tt != TokenError && tt != TokenEOF {
@@ -127,4 +129,14 @@ func TestMatchers(t *testing.T) {
 	checkMatch("1\\15", TokenDimension, "1", &TokenExtraNumeric{Dimension: "\x15"})
 	checkMatch("url(0t')", TokenBadURI, "0t", &TokenExtraError{})
 	checkMatch("uri/", TokenIdent, "uri", TokenDelim, "/")
+	checkMatch("\x00", TokenIdent, "\uFFFD")
+	checkMatch("a\\0", TokenIdent, "a\uFFFD")
+	checkMatch("b\\\\0", TokenIdent, "b\\0")
+	checkMatch("00\\d", TokenDimension, "00", &TokenExtraNumeric{Dimension: "\r"})
+	// note: \f is form feed, which is 0x0C
+	checkMatch("\\0\\0\\C\\\f\\\\0",
+		TokenIdent, "\uFFFD\uFFFD\x0C\x0C\\0")
+	// String running to EOF is success, not badstring
+	checkMatch("\"a0\\d", TokenString, "a0\x0D")
+	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
 }
diff --git a/scanner/token.go b/scanner/token.go
index 49cab7c..ed8c303 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -261,8 +261,12 @@ func escapeIdent(s string, mode int) string {
 	// dashes allowed at start only for TokenIdent-ish
 	// eE not allowed at start for Dimension
 	if !isNameStart(s[0]) && s[0] != '-' && s[0] != 'e' && s[0] != 'E' {
-		buf.WriteByte('\\')
-		buf.WriteByte(s[0])
+		if isNonPrintable(s[0]) || s[0] == '\r' || s[0] == '\n' {
+			fmt.Fprintf(&buf, "\\%X ", s[0])
+		} else {
+			buf.WriteByte('\\')
+			buf.WriteByte(s[0])
+		}
 		anyChanges = true
 	} else if s[0] == 'e' || s[0] == 'E' {
 		if mode == 2 {
@@ -284,7 +288,7 @@ func escapeIdent(s string, mode int) string {
 	// Write the rest of the name
 	for i := 1; i < len(s); i++ {
 		if !isNameCode(s[i]) {
-			fmt.Fprintf(&buf, "\\%X", s[i])
+			fmt.Fprintf(&buf, "\\%X ", s[i])
 			anyChanges = true
 		} else {
 			buf.WriteByte(s[i])
@@ -308,6 +312,9 @@ func escapeString(s string) string {
 		case '\n':
 			buf.WriteString("\\0A ")
 			continue
+		case '\r':
+			buf.WriteString("\\0D ")
+			continue
 		case '\\':
 			buf.WriteString("\\\\")
 			continue

From 4c09d638c884748f2c0873b5bf4e07b64e9aed0e Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 14:48:01 -0700
Subject: [PATCH 15/33] Fix '5e', '#123', and comments

---
 scanner/scanner_test.go |  2 +
 scanner/token.go        | 83 +++++++++++++++++++++++++++++------------
 2 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 1a3b9c4..89b1cca 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -62,6 +62,7 @@ func TestMatchers(t *testing.T) {
 	checkMatch(`'ab\\cd'`, TokenString, `ab\cd`)
 	checkMatch("#name", TokenHash, "name", &TokenExtraHash{IsIdentifier: true})
 	checkMatch("##name", TokenDelim, "#", TokenHash, "name", &TokenExtraHash{IsIdentifier: true})
+	checkMatch("#123", TokenHash, "123", &TokenExtraHash{IsIdentifier: false})
 	checkMatch("42''", TokenNumber, "42", &TokenExtraNumeric{}, TokenString, "")
 	checkMatch("+42", TokenNumber, "+42", &TokenExtraNumeric{})
 	checkMatch("-42", TokenNumber, "-42", &TokenExtraNumeric{})
@@ -139,4 +140,5 @@ func TestMatchers(t *testing.T) {
 	// String running to EOF is success, not badstring
 	checkMatch("\"a0\\d", TokenString, "a0\x0D")
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
+	checkMatch("5e", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e"})
 }
diff --git a/scanner/token.go b/scanner/token.go
index ed8c303..e4929e7 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -247,8 +247,30 @@ func (e *TokenExtraError) ParseError() *ParseError {
 }
 
 func escapeIdentifier(s string) string { return escapeIdent(s, 0) }
+func escapeHashName(s string) string   { return escapeIdent(s, 1) }
 func escapeDimension(s string) string  { return escapeIdent(s, 2) }
 
+func needsHexEscaping(c byte, mode int) bool {
+	if c < 0x20 {
+		return true
+	}
+	if c >= utf8.RuneSelf {
+		return false
+	}
+	if mode == 2 {
+		if c == 'e' || c == 'E' {
+			return true
+		}
+	}
+	if c == '\\' {
+		return true
+	}
+	if isNameCode(c) {
+		return false
+	}
+	return true
+}
+
 func escapeIdent(s string, mode int) string {
 	if s == "" {
 		return ""
@@ -257,36 +279,46 @@ func escapeIdent(s string, mode int) string {
 	buf.Grow(len(s))
 	anyChanges := false
 
+	var i int
+
 	// Handle first character
 	// dashes allowed at start only for TokenIdent-ish
 	// eE not allowed at start for Dimension
-	if !isNameStart(s[0]) && s[0] != '-' && s[0] != 'e' && s[0] != 'E' {
-		if isNonPrintable(s[0]) || s[0] == '\r' || s[0] == '\n' {
-			fmt.Fprintf(&buf, "\\%X ", s[0])
+	if mode != 1 {
+		if !isNameStart(s[0]) && s[0] != '-' && s[0] != 'e' && s[0] != 'E' {
+			if needsHexEscaping(s[0], mode) {
+				fmt.Fprintf(&buf, "\\%X ", s[0])
+				anyChanges = true
+			} else {
+				buf.WriteByte('\\')
+				buf.WriteByte(s[0])
+				anyChanges = true
+			}
+		} else if s[0] == 'e' || s[0] == 'E' {
+			if mode == 2 {
+				fmt.Fprintf(&buf, "\\%X ", s[0])
+				anyChanges = true
+			} else {
+				buf.WriteByte(s[0])
+			}
+		} else if s[0] == '-' {
+			if len(s) == 1 {
+				return "\\-"
+			} else if isNameStart(s[1]) {
+				buf.WriteByte('-')
+			} else {
+				buf.WriteString("\\-")
+				anyChanges = true
+			}
 		} else {
-			buf.WriteByte('\\')
 			buf.WriteByte(s[0])
 		}
-		anyChanges = true
-	} else if s[0] == 'e' || s[0] == 'E' {
-		if mode == 2 {
-			buf.WriteByte('\\')
-			anyChanges = true
-		}
-		buf.WriteByte(s[0])
-	} else if s[0] == '-' {
-		if len(s) == 1 {
-			return "\\-"
-		} else if isNameStart(s[1]) {
-			buf.WriteByte('-')
-		} else {
-			buf.WriteString("\\-")
-		}
+		i = 1
 	} else {
-		buf.WriteByte(s[0])
+		i = 0
 	}
 	// Write the rest of the name
-	for i := 1; i < len(s); i++ {
+	for ; i < len(s); i++ {
 		if !isNameCode(s[i]) {
 			fmt.Fprintf(&buf, "\\%X ", s[i])
 			anyChanges = true
@@ -352,8 +384,13 @@ func (t *Token) WriteTo(w io.Writer) {
 			fmt.Fprint(w, t.Value)
 		}
 	case TokenHash:
+		e := t.Extra.(*TokenExtraHash)
 		io.WriteString(w, "#")
-		fmt.Fprint(w, escapeIdentifier(t.Value))
+		if e.IsIdentifier {
+			fmt.Fprint(w, escapeIdentifier(t.Value))
+		} else {
+			fmt.Fprint(w, escapeHashName(t.Value))
+		}
 	case TokenPercentage:
 		fmt.Fprint(w, t.Value, "%")
 	case TokenDimension:
@@ -370,7 +407,7 @@ func (t *Token) WriteTo(w io.Writer) {
 	case TokenComment:
 		io.WriteString(w, "/*")
 		io.WriteString(w, t.Value)
-		io.WriteString(w, "/*")
+		io.WriteString(w, "*/")
 	case TokenFunction:
 		io.WriteString(w, t.Value)
 		io.WriteString(w, "(")

From 3c8aa10b959a2ae758a60d6b5a163f8003438a2e Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 14:48:58 -0700
Subject: [PATCH 16/33] fixup: add more comment tests

---
 scanner/fuzz.go         | 1 +
 scanner/scanner_test.go | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/scanner/fuzz.go b/scanner/fuzz.go
index 9eb7146..d29b52f 100644
--- a/scanner/fuzz.go
+++ b/scanner/fuzz.go
@@ -8,6 +8,7 @@ import (
 )
 
 func Fuzz(b []byte) int {
+	fmt.Printf("=== Start fuzz test ===\n%s\n", b)
 	var tokens []Token
 
 	tz := NewTokenizer(bytes.NewReader(b))
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 89b1cca..c09315e 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -101,6 +101,8 @@ func TestMatchers(t *testing.T) {
 	checkMatch("-->", TokenCDC, "-->")
 	checkMatch("   \n   \t   \n", TokenS, "\n") // TODO - whitespace preservation
 	checkMatch("/**/", TokenComment, "")
+	checkMatch("/***/", TokenComment, "*")
+	checkMatch("/**", TokenComment, "*")
 	checkMatch("/*foo*/", TokenComment, "foo")
 	checkMatch("/* foo */", TokenComment, " foo ")
 	checkMatch("bar(", TokenFunction, "bar")

From d163d68db23f24d3713352ee89ff2d328015b6f7 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 14:50:55 -0700
Subject: [PATCH 17/33] add tests for '5e', '5e-', '5e-3'

---
 scanner/scanner_test.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index c09315e..389a302 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -76,6 +76,11 @@ func TestMatchers(t *testing.T) {
 	checkMatch("4.2%", TokenPercentage, "4.2", &TokenExtraNumeric{NonInteger: true})
 	checkMatch(".42%", TokenPercentage, ".42", &TokenExtraNumeric{NonInteger: true})
 	checkMatch("42px", TokenDimension, "42", &TokenExtraNumeric{Dimension: "px"}) // TODO check the dimension stored in .Extra
+
+	checkMatch("5e", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e"})
+	checkMatch("5e-", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e-"})
+	checkMatch("5e-3", TokenNumber, "5e-3", &TokenExtraNumeric{NonInteger: true})
+
 	checkMatch("url(http://domain.com)", TokenURI, "http://domain.com")
 	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "http://domain.com/uri/between/space")
 	checkMatch("url('http://domain.com/uri/between/single/quote')", TokenURI, "http://domain.com/uri/between/single/quote")
@@ -142,5 +147,4 @@ func TestMatchers(t *testing.T) {
 	// String running to EOF is success, not badstring
 	checkMatch("\"a0\\d", TokenString, "a0\x0D")
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
-	checkMatch("5e", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e"})
 }

From f065792a9707741a7896f8af92a4c913e5232d15 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 14:58:03 -0700
Subject: [PATCH 18/33] fix missing space after hex escape

---
 scanner/scanner_test.go | 1 +
 scanner/token.go        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 389a302..97c1155 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -80,6 +80,7 @@ func TestMatchers(t *testing.T) {
 	checkMatch("5e", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e"})
 	checkMatch("5e-", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e-"})
 	checkMatch("5e-3", TokenNumber, "5e-3", &TokenExtraNumeric{NonInteger: true})
+	checkMatch("5e-\xf1", TokenDimension, "5", &TokenExtraNumeric{Dimension: "e-\xf1"})
 
 	checkMatch("url(http://domain.com)", TokenURI, "http://domain.com")
 	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "http://domain.com/uri/between/space")
diff --git a/scanner/token.go b/scanner/token.go
index e4929e7..a10817b 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -352,7 +352,7 @@ func escapeString(s string) string {
 			continue
 		}
 		if s[i] < utf8.RuneSelf && isNonPrintable(s[i]) {
-			fmt.Fprintf(&buf, "\\%X", s[i])
+			fmt.Fprintf(&buf, "\\%X ", s[i])
 			continue
 		}
 		buf.WriteByte(s[i])

From 0386e01b94b3b2aa0c2ed95581c0606c1e212cf4 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:02:40 -0700
Subject: [PATCH 19/33] call escapeIdentifer() for TokenFunction

---
 scanner/scanner_test.go | 1 +
 scanner/token.go        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 97c1155..4b6d190 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -148,4 +148,5 @@ func TestMatchers(t *testing.T) {
 	// String running to EOF is success, not badstring
 	checkMatch("\"a0\\d", TokenString, "a0\x0D")
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
+	checkMatch("\\fun(", TokenFunction, "\x0fun")
 }
diff --git a/scanner/token.go b/scanner/token.go
index a10817b..7d9e068 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -409,7 +409,7 @@ func (t *Token) WriteTo(w io.Writer) {
 		io.WriteString(w, t.Value)
 		io.WriteString(w, "*/")
 	case TokenFunction:
-		io.WriteString(w, t.Value)
+		io.WriteString(w, escapeIdentifier(t.Value))
 		io.WriteString(w, "(")
 
 	case TokenBadEscape:

From b5c30c672a9800a104c29ac8780067448177530f Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:11:11 -0700
Subject: [PATCH 20/33] Fuzz fixes for bad-string

---
 scanner/scanner_test.go |  1 +
 scanner/token.go        | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
index 4b6d190..f583b36 100644
--- a/scanner/scanner_test.go
+++ b/scanner/scanner_test.go
@@ -149,4 +149,5 @@ func TestMatchers(t *testing.T) {
 	checkMatch("\"a0\\d", TokenString, "a0\x0D")
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
 	checkMatch("\\fun(", TokenFunction, "\x0fun")
+	checkMatch("\"abc\\\"def\nghi", TokenBadString, "abc\"def", &TokenExtraError{}, TokenS, "\n", TokenIdent, "ghi")
 }
diff --git a/scanner/token.go b/scanner/token.go
index 7d9e068..bcec079 100644
--- a/scanner/token.go
+++ b/scanner/token.go
@@ -333,14 +333,20 @@ func escapeIdent(s string, mode int) string {
 	return buf.String()
 }
 
-func escapeString(s string) string {
+func escapeString(s string, delim byte) string {
 	var buf bytes.Buffer
-	buf.WriteByte('"')
+	if delim != 0 {
+		buf.WriteByte(delim)
+	}
 	for i := 0; i < len(s); i++ {
 		switch s[i] {
 		case '"':
 			buf.WriteString("\\\"")
 			continue
+		case delim:
+			buf.WriteByte('\\')
+			buf.WriteByte(delim)
+			continue
 		case '\n':
 			buf.WriteString("\\0A ")
 			continue
@@ -357,7 +363,9 @@ func escapeString(s string) string {
 		}
 		buf.WriteByte(s[i])
 	}
-	buf.WriteByte('"')
+	if delim != 0 {
+		buf.WriteByte(delim)
+	}
 	return buf.String()
 }
 
@@ -397,10 +405,10 @@ func (t *Token) WriteTo(w io.Writer) {
 		e := t.Extra.(*TokenExtraNumeric)
 		fmt.Fprint(w, t.Value, escapeDimension(e.Dimension))
 	case TokenString:
-		io.WriteString(w, escapeString(t.Value))
+		io.WriteString(w, escapeString(t.Value, '"'))
 	case TokenURI:
 		io.WriteString(w, "url(")
-		io.WriteString(w, escapeString(t.Value))
+		io.WriteString(w, escapeString(t.Value, '"'))
 		io.WriteString(w, ")")
 	case TokenUnicodeRange:
 		io.WriteString(w, t.Extra.String())
@@ -416,11 +424,11 @@ func (t *Token) WriteTo(w io.Writer) {
 		io.WriteString(w, "\\\n")
 	case TokenBadString:
 		io.WriteString(w, "\"")
-		io.WriteString(w, t.Value)
+		io.WriteString(w, escapeString(t.Value, 0))
 		io.WriteString(w, "\n")
 	case TokenBadURI:
-		io.WriteString(w, "url(")
-		str := escapeString(t.Value)
+		io.WriteString(w, "url(\"")
+		str := escapeString(t.Value, 0)
 		str = strings.TrimSuffix(str, "\"")
 		io.WriteString(w, str)
 		io.WriteString(w, "\n)")

From 87fb86e0b4bcb334a1aed976a8e16a40fbf27fbe Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:37:29 -0700
Subject: [PATCH 21/33] Rename package, update documentation

---
 scanner/doc.go                                |  33 --
 scanner/scanner.go                            | 287 ------------------
 {scanner => tokenizer}/crlf.go                |   2 +-
 tokenizer/doc.go                              |  52 ++++
 {scanner => tokenizer}/fuzz.go                |   4 +-
 {scanner => tokenizer}/scanner_test.go        |   4 +-
 .../testdata/fuzz/corpus/test-1               |   0
 .../testdata/fuzz/corpus/test-10              |   0
 .../testdata/fuzz/corpus/test-11              |   0
 .../testdata/fuzz/corpus/test-12              |   0
 .../testdata/fuzz/corpus/test-13              |   0
 .../testdata/fuzz/corpus/test-14              |   0
 .../testdata/fuzz/corpus/test-15              |   0
 .../testdata/fuzz/corpus/test-16              |   0
 .../testdata/fuzz/corpus/test-17              |   0
 .../testdata/fuzz/corpus/test-18              |   0
 .../testdata/fuzz/corpus/test-19              |   0
 .../testdata/fuzz/corpus/test-2               |   0
 .../testdata/fuzz/corpus/test-20              |   0
 .../testdata/fuzz/corpus/test-21              |   0
 .../testdata/fuzz/corpus/test-22              |   0
 .../testdata/fuzz/corpus/test-23              |   0
 .../testdata/fuzz/corpus/test-24              |   0
 .../testdata/fuzz/corpus/test-25              |   0
 .../testdata/fuzz/corpus/test-26              |   0
 .../testdata/fuzz/corpus/test-27              |   0
 .../testdata/fuzz/corpus/test-28              |   0
 .../testdata/fuzz/corpus/test-29              |   0
 .../testdata/fuzz/corpus/test-3               |   0
 .../testdata/fuzz/corpus/test-30              |   0
 .../testdata/fuzz/corpus/test-31              |   0
 .../testdata/fuzz/corpus/test-32              |   0
 .../testdata/fuzz/corpus/test-33              |   0
 .../testdata/fuzz/corpus/test-34              |   0
 .../testdata/fuzz/corpus/test-35              |   0
 .../testdata/fuzz/corpus/test-36              |   0
 .../testdata/fuzz/corpus/test-37              |   0
 .../testdata/fuzz/corpus/test-38              |   0
 .../testdata/fuzz/corpus/test-39              |   0
 .../testdata/fuzz/corpus/test-4               |   0
 .../testdata/fuzz/corpus/test-40              |   0
 .../testdata/fuzz/corpus/test-41              |   0
 .../testdata/fuzz/corpus/test-42              |   0
 .../testdata/fuzz/corpus/test-43              |   0
 .../testdata/fuzz/corpus/test-44              |   0
 .../testdata/fuzz/corpus/test-45              |   0
 .../testdata/fuzz/corpus/test-46              |   0
 .../testdata/fuzz/corpus/test-47              |   0
 .../testdata/fuzz/corpus/test-48              |   0
 .../testdata/fuzz/corpus/test-49              |   0
 .../testdata/fuzz/corpus/test-5               |   0
 .../testdata/fuzz/corpus/test-6               |   0
 .../testdata/fuzz/corpus/test-7               |   0
 .../testdata/fuzz/corpus/test-8               |   0
 .../testdata/fuzz/corpus/test-9               |   0
 {scanner => tokenizer}/token.go               |   2 +-
 {scanner => tokenizer}/tokenizer.go           |   2 +-
 57 files changed, 61 insertions(+), 325 deletions(-)
 delete mode 100644 scanner/doc.go
 delete mode 100644 scanner/scanner.go
 rename {scanner => tokenizer}/crlf.go (98%)
 create mode 100644 tokenizer/doc.go
 rename {scanner => tokenizer}/fuzz.go (97%)
 rename {scanner => tokenizer}/scanner_test.go (98%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-1 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-10 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-11 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-12 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-13 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-14 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-15 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-16 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-17 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-18 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-19 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-2 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-20 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-21 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-22 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-23 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-24 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-25 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-26 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-27 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-28 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-29 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-3 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-30 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-31 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-32 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-33 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-34 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-35 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-36 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-37 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-38 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-39 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-4 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-40 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-41 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-42 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-43 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-44 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-45 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-46 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-47 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-48 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-49 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-5 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-6 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-7 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-8 (100%)
 rename {scanner => tokenizer}/testdata/fuzz/corpus/test-9 (100%)
 rename {scanner => tokenizer}/token.go (99%)
 rename {scanner => tokenizer}/tokenizer.go (99%)

diff --git a/scanner/doc.go b/scanner/doc.go
deleted file mode 100644
index f19850e..0000000
--- a/scanner/doc.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package gorilla/css/scanner generates tokens for a CSS3 input.
-
-It follows the CSS3 specification located at:
-
-	http://www.w3.org/TR/css3-syntax/
-
-To use it, create a new scanner for a given CSS string and call Next() until
-the token returned has type TokenEOF or TokenError:
-
-	s := scanner.New(myCSS)
-	for {
-		token := s.Next()
-		if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
-			break
-		}
-		// Do something with the token...
-	}
-
-Following the CSS3 specification, an error can only occur when the scanner
-finds an unclosed quote or unclosed comment. In these cases the text becomes
-"untokenizable". Everything else is tokenizable and it is up to a parser
-to make sense of the token stream (or ignore nonsensical token sequences).
-
-Note: the scanner doesn't perform lexical analysis or, in other words, it
-doesn't care about the token context. It is intended to be used by a
-lexer or parser.
-*/
-package scanner
diff --git a/scanner/scanner.go b/scanner/scanner.go
deleted file mode 100644
index 7f034e2..0000000
--- a/scanner/scanner.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
-
-//+build ignore
-
-// Copyright 2012 The Gorilla Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package scanner
-
-import (
-	"bufio"
-	"fmt"
-	"regexp"
-	"strings"
-	"unicode"
-	"unicode/utf8"
-)
-
-// String returns a string representation of the token.
-func (t *Token) String() string {
-	if len(t.Value) > 10 {
-		return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
-			t.Type, t.Line, t.Column, t.Value)
-	}
-	return fmt.Sprintf("%s (line: %d, column: %d): %q",
-		t.Type, t.Line, t.Column, t.Value)
-}
-
-// All tokens -----------------------------------------------------------------
-
-// Macros and productions -----------------------------------------------------
-// http://www.w3.org/TR/css3-syntax/#tokenization
-
-var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
-
-// macros maps macro names to patterns to be expanded.
-var macros = map[string]string{
-	// must be escaped: `\.+*?()|[]{}^$`
-	"ident":      `-?{nmstart}{nmchar}*`,
-	"name":       `{nmchar}+`,
-	"nmstart":    `[a-zA-Z_]|{nonascii}|{escape}`,
-	"nonascii":   "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
-	"unicode":    `\\[0-9a-fA-F]{1,6}{wc}?`,
-	"escape":     "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
-	"nmchar":     `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
-	"num":        `[0-9]*\.[0-9]+|[0-9]+`,
-	"string":     `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
-	"stringchar": `{urlchar}|[ ]|\\{nl}`,
-	"nl":         `[\n\r\f]|\r\n`,
-	"w":          `{wc}*`,
-	"wc":         `[\t\n\f\r ]`,
-
-	// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
-	// ASCII characters range = `[\u0020-\u007e]`
-	// Skip space \u0020 = `[\u0021-\u007e]`
-	// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
-	// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
-	// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
-	// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
-	"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
-}
-
-// productions maps the list of tokens to patterns to be expanded.
-var productions = map[TokenType]string{
-	// Unused regexps (matched using other methods) are commented out.
-	TokenIdent:        `{ident}`,
-	TokenAtKeyword:    `@{ident}`,
-	TokenString:       `{string}`,
-	TokenHash:         `#{name}`,
-	TokenNumber:       `{num}`,
-	TokenPercentage:   `{num}%`,
-	TokenDimension:    `{num}{ident}`,
-	TokenURI:          `url\({w}(?:{string}|{urlchar}*?){w}\)`,
-	TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
-	//TokenCDO:            `<!--`,
-	TokenCDC:      `-->`,
-	TokenS:        `{wc}+`,
-	TokenComment:  `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
-	TokenFunction: `{ident}\(`,
-	//TokenIncludes:       `~=`,
-	//TokenDashMatch:      `\|=`,
-	//TokenPrefixMatch:    `\^=`,
-	//TokenSuffixMatch:    `\$=`,
-	//TokenSubstringMatch: `\*=`,
-	//TokenChar:           `[^"']`,
-	//TokenBOM:            "\uFEFF",
-}
-
-// matchers maps the list of tokens to compiled regular expressions.
-//
-// The map is filled on init() using the macros and productions defined in
-// the CSS specification.
-var matchers = map[TokenType]*regexp.Regexp{}
-
-// matchOrder is the order to test regexps when first-char shortcuts
-// can't be used.
-var matchOrder = []TokenType{
-	TokenURI,
-	TokenFunction,
-	TokenUnicodeRange,
-	TokenIdent,
-	TokenDimension,
-	TokenPercentage,
-	TokenNumber,
-	TokenCDC,
-}
-
-func init() {
-	// replace macros and compile regexps for productions.
-	replaceMacro := func(s string) string {
-		return "(?:" + macros[s[1:len(s)-1]] + ")"
-	}
-	for t, s := range productions {
-		for macroRegexp.MatchString(s) {
-			s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
-		}
-		matchers[t] = regexp.MustCompile("^(?:" + s + ")")
-	}
-}
-
-// Scanner --------------------------------------------------------------------
-
-type Scanner struct {
-	input string
-	row   int
-	col   int
-	pos   int
-}
-
-// New returns a new CSS scanner for the given input.
-func New(r *bufio.Reader) *Scanner {
-	// Normalize newlines.
-	input = strings.Replace(input, "\r\n", "\n", -1)
-	return &Scanner{
-		input: input,
-		row:   1,
-		col:   1,
-	}
-}
-
-// Next returns the next token from the input.
-//
-// At the end of the input the token type is TokenEOF.
-//
-// If the input can't be tokenized the token type is TokenError. This occurs
-// in case of unclosed quotation marks or comments.
-func (s *Scanner) Next() *Token {
-	if s.err != nil {
-		return s.err
-	}
-	if s.pos >= len(s.input) {
-		s.err = &Token{TokenEOF, "", s.row, s.col}
-		return s.err
-	}
-	if s.pos == 0 {
-		// Test BOM only once, at the beginning of the file.
-		if strings.HasPrefix(s.input, "\uFEFF") {
-			return s.emitSimple(TokenBOM, "\uFEFF")
-		}
-	}
-	// There's a lot we can guess based on the first byte so we'll take a
-	// shortcut before testing multiple regexps.
-	input := s.input[s.pos:]
-	switch input[0] {
-	case '\t', '\n', '\f', '\r', ' ':
-		// Whitespace.
-		return s.emitToken(TokenS, matchers[TokenS].FindString(input))
-	case '.':
-		// Dot is too common to not have a quick check.
-		// We'll test if this is a Char; if it is followed by a number it is a
-		// dimension/percentage/number, and this will be matched later.
-		if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
-			return s.emitSimple(TokenChar, ".")
-		}
-	case '#':
-		// Another common one: Hash or Char.
-		if match := matchers[TokenHash].FindString(input); match != "" {
-			return s.emitToken(TokenHash, match)
-		}
-		return s.emitSimple(TokenChar, "#")
-	case '@':
-		// Another common one: AtKeyword or Char.
-		if match := matchers[TokenAtKeyword].FindString(input); match != "" {
-			return s.emitSimple(TokenAtKeyword, match)
-		}
-		return s.emitSimple(TokenChar, "@")
-	case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
-		// More common chars.
-		return s.emitSimple(TokenChar, string(input[0]))
-	case '"', '\'':
-		// String or error.
-		match := matchers[TokenString].FindString(input)
-		if match != "" {
-			return s.emitToken(TokenString, match)
-		}
-
-		s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
-		return s.err
-	case '/':
-		// Comment, error or Char.
-		if len(input) > 1 && input[1] == '*' {
-			match := matchers[TokenComment].FindString(input)
-			if match != "" {
-				return s.emitToken(TokenComment, match)
-			} else {
-				s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
-				return s.err
-			}
-		}
-		return s.emitSimple(TokenChar, "/")
-	case '~':
-		// Includes or Char.
-		return s.emitPrefixOrChar(TokenIncludes, "~=")
-	case '|':
-		// DashMatch or Char.
-		return s.emitPrefixOrChar(TokenDashMatch, "|=")
-	case '^':
-		// PrefixMatch or Char.
-		return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
-	case '$':
-		// SuffixMatch or Char.
-		return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
-	case '*':
-		// SubstringMatch or Char.
-		return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
-	case '<':
-		// CDO or Char.
-		return s.emitPrefixOrChar(TokenCDO, "<!--")
-	}
-	// Test all regexps, in order.
-	for _, token := range matchOrder {
-		if match := matchers[token].FindString(input); match != "" {
-			return s.emitToken(token, match)
-		}
-	}
-	// We already handled unclosed quotation marks and comments,
-	// so this can only be a Char.
-	r, width := utf8.DecodeRuneInString(input)
-	token := &Token{TokenChar, string(r), s.row, s.col}
-	s.col += width
-	s.pos += width
-	return token
-}
-
-// updatePosition updates input coordinates based on the consumed text.
-func (s *Scanner) updatePosition(text string) {
-	width := utf8.RuneCountInString(text)
-	lines := strings.Count(text, "\n")
-	s.row += lines
-	if lines == 0 {
-		s.col += width
-	} else {
-		s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
-	}
-	s.pos += len(text) // while col is a rune index, pos is a byte index
-}
-
-// emitToken returns a Token for the string v and updates the scanner position.
-func (s *Scanner) emitToken(t TokenType, v string) *Token {
-	token := &Token{t, v, s.row, s.col}
-	s.updatePosition(v)
-	return token
-}
-
-// emitSimple returns a Token for the string v and updates the scanner
-// position in a simplified manner.
-//
-// The string is known to have only ASCII characters and to not have a newline.
-func (s *Scanner) emitSimple(t TokenType, v string) *Token {
-	token := &Token{t, v, s.row, s.col}
-	s.col += len(v)
-	s.pos += len(v)
-	return token
-}
-
-// emitPrefixOrChar returns a Token for type t if the current position
-// matches the given prefix. Otherwise it returns a Char token using the
-// first character from the prefix.
-//
-// The prefix is known to have only ASCII characters and to not have a newline.
-func (s *Scanner) emitPrefixOrChar(t TokenType, prefix string) *Token {
-	if strings.HasPrefix(s.input[s.pos:], prefix) {
-		return s.emitSimple(t, prefix)
-	}
-	return s.emitSimple(TokenChar, string(prefix[0]))
-}
diff --git a/scanner/crlf.go b/tokenizer/crlf.go
similarity index 98%
rename from scanner/crlf.go
rename to tokenizer/crlf.go
index ee6b523..1c0a084 100644
--- a/scanner/crlf.go
+++ b/tokenizer/crlf.go
@@ -1,6 +1,6 @@
 // Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
 
-package scanner
+package tokenizer
 
 // The crlf package helps in dealing with files that have DOS-style CR/LF line
 // endings.
diff --git a/tokenizer/doc.go b/tokenizer/doc.go
new file mode 100644
index 0000000..d8c693d
--- /dev/null
+++ b/tokenizer/doc.go
@@ -0,0 +1,52 @@
+// Copyright 2018 Kane York.
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package gorilla/css/tokenizer generates tokens for a CSS3 input.
+
+It follows the CSS3 specification located at:
+
+	http://www.w3.org/TR/css3-syntax/
+
+To use it, create a new scanner for a given CSS input and call Next() until
+the token returned is a "stop token":
+
+	s := tokenizer.New(strings.NewReader(myCSS))
+	for {
+		token := s.Next()
+		if token.Type.StopToken() {
+			break
+		}
+		// Do something with the token...
+	}
+
+If the consumer wants to accept malformed input, change the check to the
+following instead:
+
+		token := s.Next()
+		if token.Type == tokenizer.TokenEOF || token.Type == tokenizer.TokenError {
+			break
+		}
+
+The three potential tokenization errors are a "bad-escape" (backslash-newline
+outside a "string" or url() in the input), a "bad-string" (unescaped newline
+inside a "string"), and a "bad-url" (a few different cases). Parsers can choose
+to abort when seeing one of these errors, or ignore the declaration and attempt
+to recover.
+
+Returned tokens that carry extra information have a non-nil .Extra value. For
+TokenError, TokenBadEscape, TokenBadString, and TokenBadURI, the
+TokenExtraError type carries an `error` with informative text about the nature
+of the error. For TokenNumber, TokenPercentage, and TokenDimension, the
+TokenExtraNumeric specifies whether the number is integral, and for
+TokenDimension, contains the unit string (e.g. "px"). For TokenUnicodeRange,
+the TokenExtraUnicodeRange type contains the actual start and end values of the
+range.
+
+Note: the scanner doesn't perform lexical analysis or, in other words, it
+doesn't care about the token context. It is intended to be used by a
+lexer or parser.
+*/
+package tokenzier
diff --git a/scanner/fuzz.go b/tokenizer/fuzz.go
similarity index 97%
rename from scanner/fuzz.go
rename to tokenizer/fuzz.go
index d29b52f..d8e512b 100644
--- a/scanner/fuzz.go
+++ b/tokenizer/fuzz.go
@@ -1,4 +1,6 @@
-package scanner
+// Copyright 2018 Kane York.
+
+package tokenizer
 
 import (
 	"bytes"
diff --git a/scanner/scanner_test.go b/tokenizer/scanner_test.go
similarity index 98%
rename from scanner/scanner_test.go
rename to tokenizer/scanner_test.go
index f583b36..f57ef2d 100644
--- a/scanner/scanner_test.go
+++ b/tokenizer/scanner_test.go
@@ -1,8 +1,9 @@
+// Copyright 2018 Kane York.
 // Copyright 2012 The Gorilla Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-package scanner
+package tokenizer
 
 import (
 	"reflect"
@@ -150,4 +151,5 @@ func TestMatchers(t *testing.T) {
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
 	checkMatch("\\fun(", TokenFunction, "\x0fun")
 	checkMatch("\"abc\\\"def\nghi", TokenBadString, "abc\"def", &TokenExtraError{}, TokenS, "\n", TokenIdent, "ghi")
+	checkMatch("---\\\x18-00", TokenDelim, "-", TokenDelim, "-", TokenIdent, "-\x18-00")
 }
diff --git a/scanner/testdata/fuzz/corpus/test-1 b/tokenizer/testdata/fuzz/corpus/test-1
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-1
rename to tokenizer/testdata/fuzz/corpus/test-1
diff --git a/scanner/testdata/fuzz/corpus/test-10 b/tokenizer/testdata/fuzz/corpus/test-10
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-10
rename to tokenizer/testdata/fuzz/corpus/test-10
diff --git a/scanner/testdata/fuzz/corpus/test-11 b/tokenizer/testdata/fuzz/corpus/test-11
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-11
rename to tokenizer/testdata/fuzz/corpus/test-11
diff --git a/scanner/testdata/fuzz/corpus/test-12 b/tokenizer/testdata/fuzz/corpus/test-12
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-12
rename to tokenizer/testdata/fuzz/corpus/test-12
diff --git a/scanner/testdata/fuzz/corpus/test-13 b/tokenizer/testdata/fuzz/corpus/test-13
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-13
rename to tokenizer/testdata/fuzz/corpus/test-13
diff --git a/scanner/testdata/fuzz/corpus/test-14 b/tokenizer/testdata/fuzz/corpus/test-14
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-14
rename to tokenizer/testdata/fuzz/corpus/test-14
diff --git a/scanner/testdata/fuzz/corpus/test-15 b/tokenizer/testdata/fuzz/corpus/test-15
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-15
rename to tokenizer/testdata/fuzz/corpus/test-15
diff --git a/scanner/testdata/fuzz/corpus/test-16 b/tokenizer/testdata/fuzz/corpus/test-16
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-16
rename to tokenizer/testdata/fuzz/corpus/test-16
diff --git a/scanner/testdata/fuzz/corpus/test-17 b/tokenizer/testdata/fuzz/corpus/test-17
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-17
rename to tokenizer/testdata/fuzz/corpus/test-17
diff --git a/scanner/testdata/fuzz/corpus/test-18 b/tokenizer/testdata/fuzz/corpus/test-18
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-18
rename to tokenizer/testdata/fuzz/corpus/test-18
diff --git a/scanner/testdata/fuzz/corpus/test-19 b/tokenizer/testdata/fuzz/corpus/test-19
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-19
rename to tokenizer/testdata/fuzz/corpus/test-19
diff --git a/scanner/testdata/fuzz/corpus/test-2 b/tokenizer/testdata/fuzz/corpus/test-2
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-2
rename to tokenizer/testdata/fuzz/corpus/test-2
diff --git a/scanner/testdata/fuzz/corpus/test-20 b/tokenizer/testdata/fuzz/corpus/test-20
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-20
rename to tokenizer/testdata/fuzz/corpus/test-20
diff --git a/scanner/testdata/fuzz/corpus/test-21 b/tokenizer/testdata/fuzz/corpus/test-21
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-21
rename to tokenizer/testdata/fuzz/corpus/test-21
diff --git a/scanner/testdata/fuzz/corpus/test-22 b/tokenizer/testdata/fuzz/corpus/test-22
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-22
rename to tokenizer/testdata/fuzz/corpus/test-22
diff --git a/scanner/testdata/fuzz/corpus/test-23 b/tokenizer/testdata/fuzz/corpus/test-23
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-23
rename to tokenizer/testdata/fuzz/corpus/test-23
diff --git a/scanner/testdata/fuzz/corpus/test-24 b/tokenizer/testdata/fuzz/corpus/test-24
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-24
rename to tokenizer/testdata/fuzz/corpus/test-24
diff --git a/scanner/testdata/fuzz/corpus/test-25 b/tokenizer/testdata/fuzz/corpus/test-25
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-25
rename to tokenizer/testdata/fuzz/corpus/test-25
diff --git a/scanner/testdata/fuzz/corpus/test-26 b/tokenizer/testdata/fuzz/corpus/test-26
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-26
rename to tokenizer/testdata/fuzz/corpus/test-26
diff --git a/scanner/testdata/fuzz/corpus/test-27 b/tokenizer/testdata/fuzz/corpus/test-27
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-27
rename to tokenizer/testdata/fuzz/corpus/test-27
diff --git a/scanner/testdata/fuzz/corpus/test-28 b/tokenizer/testdata/fuzz/corpus/test-28
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-28
rename to tokenizer/testdata/fuzz/corpus/test-28
diff --git a/scanner/testdata/fuzz/corpus/test-29 b/tokenizer/testdata/fuzz/corpus/test-29
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-29
rename to tokenizer/testdata/fuzz/corpus/test-29
diff --git a/scanner/testdata/fuzz/corpus/test-3 b/tokenizer/testdata/fuzz/corpus/test-3
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-3
rename to tokenizer/testdata/fuzz/corpus/test-3
diff --git a/scanner/testdata/fuzz/corpus/test-30 b/tokenizer/testdata/fuzz/corpus/test-30
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-30
rename to tokenizer/testdata/fuzz/corpus/test-30
diff --git a/scanner/testdata/fuzz/corpus/test-31 b/tokenizer/testdata/fuzz/corpus/test-31
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-31
rename to tokenizer/testdata/fuzz/corpus/test-31
diff --git a/scanner/testdata/fuzz/corpus/test-32 b/tokenizer/testdata/fuzz/corpus/test-32
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-32
rename to tokenizer/testdata/fuzz/corpus/test-32
diff --git a/scanner/testdata/fuzz/corpus/test-33 b/tokenizer/testdata/fuzz/corpus/test-33
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-33
rename to tokenizer/testdata/fuzz/corpus/test-33
diff --git a/scanner/testdata/fuzz/corpus/test-34 b/tokenizer/testdata/fuzz/corpus/test-34
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-34
rename to tokenizer/testdata/fuzz/corpus/test-34
diff --git a/scanner/testdata/fuzz/corpus/test-35 b/tokenizer/testdata/fuzz/corpus/test-35
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-35
rename to tokenizer/testdata/fuzz/corpus/test-35
diff --git a/scanner/testdata/fuzz/corpus/test-36 b/tokenizer/testdata/fuzz/corpus/test-36
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-36
rename to tokenizer/testdata/fuzz/corpus/test-36
diff --git a/scanner/testdata/fuzz/corpus/test-37 b/tokenizer/testdata/fuzz/corpus/test-37
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-37
rename to tokenizer/testdata/fuzz/corpus/test-37
diff --git a/scanner/testdata/fuzz/corpus/test-38 b/tokenizer/testdata/fuzz/corpus/test-38
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-38
rename to tokenizer/testdata/fuzz/corpus/test-38
diff --git a/scanner/testdata/fuzz/corpus/test-39 b/tokenizer/testdata/fuzz/corpus/test-39
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-39
rename to tokenizer/testdata/fuzz/corpus/test-39
diff --git a/scanner/testdata/fuzz/corpus/test-4 b/tokenizer/testdata/fuzz/corpus/test-4
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-4
rename to tokenizer/testdata/fuzz/corpus/test-4
diff --git a/scanner/testdata/fuzz/corpus/test-40 b/tokenizer/testdata/fuzz/corpus/test-40
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-40
rename to tokenizer/testdata/fuzz/corpus/test-40
diff --git a/scanner/testdata/fuzz/corpus/test-41 b/tokenizer/testdata/fuzz/corpus/test-41
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-41
rename to tokenizer/testdata/fuzz/corpus/test-41
diff --git a/scanner/testdata/fuzz/corpus/test-42 b/tokenizer/testdata/fuzz/corpus/test-42
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-42
rename to tokenizer/testdata/fuzz/corpus/test-42
diff --git a/scanner/testdata/fuzz/corpus/test-43 b/tokenizer/testdata/fuzz/corpus/test-43
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-43
rename to tokenizer/testdata/fuzz/corpus/test-43
diff --git a/scanner/testdata/fuzz/corpus/test-44 b/tokenizer/testdata/fuzz/corpus/test-44
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-44
rename to tokenizer/testdata/fuzz/corpus/test-44
diff --git a/scanner/testdata/fuzz/corpus/test-45 b/tokenizer/testdata/fuzz/corpus/test-45
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-45
rename to tokenizer/testdata/fuzz/corpus/test-45
diff --git a/scanner/testdata/fuzz/corpus/test-46 b/tokenizer/testdata/fuzz/corpus/test-46
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-46
rename to tokenizer/testdata/fuzz/corpus/test-46
diff --git a/scanner/testdata/fuzz/corpus/test-47 b/tokenizer/testdata/fuzz/corpus/test-47
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-47
rename to tokenizer/testdata/fuzz/corpus/test-47
diff --git a/scanner/testdata/fuzz/corpus/test-48 b/tokenizer/testdata/fuzz/corpus/test-48
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-48
rename to tokenizer/testdata/fuzz/corpus/test-48
diff --git a/scanner/testdata/fuzz/corpus/test-49 b/tokenizer/testdata/fuzz/corpus/test-49
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-49
rename to tokenizer/testdata/fuzz/corpus/test-49
diff --git a/scanner/testdata/fuzz/corpus/test-5 b/tokenizer/testdata/fuzz/corpus/test-5
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-5
rename to tokenizer/testdata/fuzz/corpus/test-5
diff --git a/scanner/testdata/fuzz/corpus/test-6 b/tokenizer/testdata/fuzz/corpus/test-6
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-6
rename to tokenizer/testdata/fuzz/corpus/test-6
diff --git a/scanner/testdata/fuzz/corpus/test-7 b/tokenizer/testdata/fuzz/corpus/test-7
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-7
rename to tokenizer/testdata/fuzz/corpus/test-7
diff --git a/scanner/testdata/fuzz/corpus/test-8 b/tokenizer/testdata/fuzz/corpus/test-8
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-8
rename to tokenizer/testdata/fuzz/corpus/test-8
diff --git a/scanner/testdata/fuzz/corpus/test-9 b/tokenizer/testdata/fuzz/corpus/test-9
similarity index 100%
rename from scanner/testdata/fuzz/corpus/test-9
rename to tokenizer/testdata/fuzz/corpus/test-9
diff --git a/scanner/token.go b/tokenizer/token.go
similarity index 99%
rename from scanner/token.go
rename to tokenizer/token.go
index bcec079..5e6ec52 100644
--- a/scanner/token.go
+++ b/tokenizer/token.go
@@ -3,7 +3,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-package scanner
+package tokenizer
 
 import (
 	"bytes"
diff --git a/scanner/tokenizer.go b/tokenizer/tokenizer.go
similarity index 99%
rename from scanner/tokenizer.go
rename to tokenizer/tokenizer.go
index 6c4babd..0ecf20e 100644
--- a/scanner/tokenizer.go
+++ b/tokenizer/tokenizer.go
@@ -1,6 +1,6 @@
 // Copyright (c) 2018 Kane York. Licensed under 2-Clause BSD.
 
-package scanner
+package tokenizer
 
 import (
 	"bufio"

From 2fc5ca6367ec56d7e0646d83c6fac07ac83c1437 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:38:05 -0700
Subject: [PATCH 22/33] Restore original 'scanner' directory

---
 scanner/doc.go          |  33 ++++
 scanner/scanner.go      | 356 ++++++++++++++++++++++++++++++++++++++++
 scanner/scanner_test.go |  75 +++++++++
 3 files changed, 464 insertions(+)
 create mode 100644 scanner/doc.go
 create mode 100644 scanner/scanner.go
 create mode 100644 scanner/scanner_test.go

diff --git a/scanner/doc.go b/scanner/doc.go
new file mode 100644
index 0000000..f19850e
--- /dev/null
+++ b/scanner/doc.go
@@ -0,0 +1,33 @@
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package gorilla/css/scanner generates tokens for a CSS3 input.
+
+It follows the CSS3 specification located at:
+
+	http://www.w3.org/TR/css3-syntax/
+
+To use it, create a new scanner for a given CSS string and call Next() until
+the token returned has type TokenEOF or TokenError:
+
+	s := scanner.New(myCSS)
+	for {
+		token := s.Next()
+		if token.Type == scanner.TokenEOF || token.Type == scanner.TokenError {
+			break
+		}
+		// Do something with the token...
+	}
+
+Following the CSS3 specification, an error can only occur when the scanner
+finds an unclosed quote or unclosed comment. In these cases the text becomes
+"untokenizable". Everything else is tokenizable and it is up to a parser
+to make sense of the token stream (or ignore nonsensical token sequences).
+
+Note: the scanner doesn't perform lexical analysis or, in other words, it
+doesn't care about the token context. It is intended to be used by a
+lexer or parser.
+*/
+package scanner
diff --git a/scanner/scanner.go b/scanner/scanner.go
new file mode 100644
index 0000000..23fa740
--- /dev/null
+++ b/scanner/scanner.go
@@ -0,0 +1,356 @@
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// tokenType identifies the type of lexical tokens.
+type tokenType int
+
+// String returns a string representation of the token type.
+func (t tokenType) String() string {
+	return tokenNames[t]
+}
+
+// Token represents a token and the corresponding string.
+type Token struct {
+	Type   tokenType
+	Value  string
+	Line   int
+	Column int
+}
+
+// String returns a string representation of the token.
+func (t *Token) String() string {
+	if len(t.Value) > 10 {
+		return fmt.Sprintf("%s (line: %d, column: %d): %.10q...",
+			t.Type, t.Line, t.Column, t.Value)
+	}
+	return fmt.Sprintf("%s (line: %d, column: %d): %q",
+		t.Type, t.Line, t.Column, t.Value)
+}
+
+// All tokens -----------------------------------------------------------------
+
+// The complete list of tokens in CSS3.
+const (
+	// Scanner flags.
+	TokenError tokenType = iota
+	TokenEOF
+	// From now on, only tokens from the CSS specification.
+	TokenIdent
+	TokenAtKeyword
+	TokenString
+	TokenHash
+	TokenNumber
+	TokenPercentage
+	TokenDimension
+	TokenURI
+	TokenUnicodeRange
+	TokenCDO
+	TokenCDC
+	TokenS
+	TokenComment
+	TokenFunction
+	TokenIncludes
+	TokenDashMatch
+	TokenPrefixMatch
+	TokenSuffixMatch
+	TokenSubstringMatch
+	TokenChar
+	TokenBOM
+)
+
+// tokenNames maps tokenType's to their names. Used for conversion to string.
+var tokenNames = map[tokenType]string{
+	TokenError:          "error",
+	TokenEOF:            "EOF",
+	TokenIdent:          "IDENT",
+	TokenAtKeyword:      "ATKEYWORD",
+	TokenString:         "STRING",
+	TokenHash:           "HASH",
+	TokenNumber:         "NUMBER",
+	TokenPercentage:     "PERCENTAGE",
+	TokenDimension:      "DIMENSION",
+	TokenURI:            "URI",
+	TokenUnicodeRange:   "UNICODE-RANGE",
+	TokenCDO:            "CDO",
+	TokenCDC:            "CDC",
+	TokenS:              "S",
+	TokenComment:        "COMMENT",
+	TokenFunction:       "FUNCTION",
+	TokenIncludes:       "INCLUDES",
+	TokenDashMatch:      "DASHMATCH",
+	TokenPrefixMatch:    "PREFIXMATCH",
+	TokenSuffixMatch:    "SUFFIXMATCH",
+	TokenSubstringMatch: "SUBSTRINGMATCH",
+	TokenChar:           "CHAR",
+	TokenBOM:            "BOM",
+}
+
+// Macros and productions -----------------------------------------------------
+// http://www.w3.org/TR/css3-syntax/#tokenization
+
+var macroRegexp = regexp.MustCompile(`\{[a-z]+\}`)
+
+// macros maps macro names to patterns to be expanded.
+var macros = map[string]string{
+	// must be escaped: `\.+*?()|[]{}^$`
+	"ident":      `-?{nmstart}{nmchar}*`,
+	"name":       `{nmchar}+`,
+	"nmstart":    `[a-zA-Z_]|{nonascii}|{escape}`,
+	"nonascii":   "[\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
+	"unicode":    `\\[0-9a-fA-F]{1,6}{wc}?`,
+	"escape":     "{unicode}|\\\\[\u0020-\u007E\u0080-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]",
+	"nmchar":     `[a-zA-Z0-9_-]|{nonascii}|{escape}`,
+	"num":        `[0-9]*\.[0-9]+|[0-9]+`,
+	"string":     `"(?:{stringchar}|')*"|'(?:{stringchar}|")*'`,
+	"stringchar": `{urlchar}|[ ]|\\{nl}`,
+	"nl":         `[\n\r\f]|\r\n`,
+	"w":          `{wc}*`,
+	"wc":         `[\t\n\f\r ]`,
+
+	// urlchar should accept [(ascii characters minus those that need escaping)|{nonascii}|{escape}]
+	// ASCII characters range = `[\u0020-\u007e]`
+	// Skip space \u0020 = `[\u0021-\u007e]`
+	// Skip quotation mark \0022 = `[\u0021\u0023-\u007e]`
+	// Skip apostrophe \u0027 = `[\u0021\u0023-\u0026\u0028-\u007e]`
+	// Skip reverse solidus \u005c = `[\u0021\u0023-\u0026\u0028-\u005b\u005d\u007e]`
+	// Finally, the left square bracket (\u005b) and right (\u005d) needs escaping themselves
+	"urlchar": "[\u0021\u0023-\u0026\u0028-\\\u005b\\\u005d-\u007E]|{nonascii}|{escape}",
+}
+
+// productions maps the list of tokens to patterns to be expanded.
+var productions = map[tokenType]string{
+	// Unused regexps (matched using other methods) are commented out.
+	TokenIdent:        `{ident}`,
+	TokenAtKeyword:    `@{ident}`,
+	TokenString:       `{string}`,
+	TokenHash:         `#{name}`,
+	TokenNumber:       `{num}`,
+	TokenPercentage:   `{num}%`,
+	TokenDimension:    `{num}{ident}`,
+	TokenURI:          `url\({w}(?:{string}|{urlchar}*?){w}\)`,
+	TokenUnicodeRange: `U\+[0-9A-F\?]{1,6}(?:-[0-9A-F]{1,6})?`,
+	//TokenCDO:            `<!--`,
+	TokenCDC:      `-->`,
+	TokenS:        `{wc}+`,
+	TokenComment:  `/\*[^\*]*[\*]+(?:[^/][^\*]*[\*]+)*/`,
+	TokenFunction: `{ident}\(`,
+	//TokenIncludes:       `~=`,
+	//TokenDashMatch:      `\|=`,
+	//TokenPrefixMatch:    `\^=`,
+	//TokenSuffixMatch:    `\$=`,
+	//TokenSubstringMatch: `\*=`,
+	//TokenChar:           `[^"']`,
+	//TokenBOM:            "\uFEFF",
+}
+
+// matchers maps the list of tokens to compiled regular expressions.
+//
+// The map is filled on init() using the macros and productions defined in
+// the CSS specification.
+var matchers = map[tokenType]*regexp.Regexp{}
+
+// matchOrder is the order to test regexps when first-char shortcuts
+// can't be used.
+var matchOrder = []tokenType{
+	TokenURI,
+	TokenFunction,
+	TokenUnicodeRange,
+	TokenIdent,
+	TokenDimension,
+	TokenPercentage,
+	TokenNumber,
+	TokenCDC,
+}
+
+func init() {
+	// replace macros and compile regexps for productions.
+	replaceMacro := func(s string) string {
+		return "(?:" + macros[s[1:len(s)-1]] + ")"
+	}
+	for t, s := range productions {
+		for macroRegexp.MatchString(s) {
+			s = macroRegexp.ReplaceAllStringFunc(s, replaceMacro)
+		}
+		matchers[t] = regexp.MustCompile("^(?:" + s + ")")
+	}
+}
+
+// Scanner --------------------------------------------------------------------
+
+// New returns a new CSS scanner for the given input.
+func New(input string) *Scanner {
+	// Normalize newlines.
+	input = strings.Replace(input, "\r\n", "\n", -1)
+	return &Scanner{
+		input: input,
+		row:   1,
+		col:   1,
+	}
+}
+
+// Scanner scans an input and emits tokens following the CSS3 specification.
+type Scanner struct {
+	input string
+	pos   int
+	row   int
+	col   int
+	err   *Token
+}
+
+// Next returns the next token from the input.
+//
+// At the end of the input the token type is TokenEOF.
+//
+// If the input can't be tokenized the token type is TokenError. This occurs
+// in case of unclosed quotation marks or comments.
+func (s *Scanner) Next() *Token {
+	if s.err != nil {
+		return s.err
+	}
+	if s.pos >= len(s.input) {
+		s.err = &Token{TokenEOF, "", s.row, s.col}
+		return s.err
+	}
+	if s.pos == 0 {
+		// Test BOM only once, at the beginning of the file.
+		if strings.HasPrefix(s.input, "\uFEFF") {
+			return s.emitSimple(TokenBOM, "\uFEFF")
+		}
+	}
+	// There's a lot we can guess based on the first byte so we'll take a
+	// shortcut before testing multiple regexps.
+	input := s.input[s.pos:]
+	switch input[0] {
+	case '\t', '\n', '\f', '\r', ' ':
+		// Whitespace.
+		return s.emitToken(TokenS, matchers[TokenS].FindString(input))
+	case '.':
+		// Dot is too common to not have a quick check.
+		// We'll test if this is a Char; if it is followed by a number it is a
+		// dimension/percentage/number, and this will be matched later.
+		if len(input) > 1 && !unicode.IsDigit(rune(input[1])) {
+			return s.emitSimple(TokenChar, ".")
+		}
+	case '#':
+		// Another common one: Hash or Char.
+		if match := matchers[TokenHash].FindString(input); match != "" {
+			return s.emitToken(TokenHash, match)
+		}
+		return s.emitSimple(TokenChar, "#")
+	case '@':
+		// Another common one: AtKeyword or Char.
+		if match := matchers[TokenAtKeyword].FindString(input); match != "" {
+			return s.emitSimple(TokenAtKeyword, match)
+		}
+		return s.emitSimple(TokenChar, "@")
+	case ':', ',', ';', '%', '&', '+', '=', '>', '(', ')', '[', ']', '{', '}':
+		// More common chars.
+		return s.emitSimple(TokenChar, string(input[0]))
+	case '"', '\'':
+		// String or error.
+		match := matchers[TokenString].FindString(input)
+		if match != "" {
+			return s.emitToken(TokenString, match)
+		}
+
+		s.err = &Token{TokenError, "unclosed quotation mark", s.row, s.col}
+		return s.err
+	case '/':
+		// Comment, error or Char.
+		if len(input) > 1 && input[1] == '*' {
+			match := matchers[TokenComment].FindString(input)
+			if match != "" {
+				return s.emitToken(TokenComment, match)
+			} else {
+				s.err = &Token{TokenError, "unclosed comment", s.row, s.col}
+				return s.err
+			}
+		}
+		return s.emitSimple(TokenChar, "/")
+	case '~':
+		// Includes or Char.
+		return s.emitPrefixOrChar(TokenIncludes, "~=")
+	case '|':
+		// DashMatch or Char.
+		return s.emitPrefixOrChar(TokenDashMatch, "|=")
+	case '^':
+		// PrefixMatch or Char.
+		return s.emitPrefixOrChar(TokenPrefixMatch, "^=")
+	case '$':
+		// SuffixMatch or Char.
+		return s.emitPrefixOrChar(TokenSuffixMatch, "$=")
+	case '*':
+		// SubstringMatch or Char.
+		return s.emitPrefixOrChar(TokenSubstringMatch, "*=")
+	case '<':
+		// CDO or Char.
+		return s.emitPrefixOrChar(TokenCDO, "<!--")
+	}
+	// Test all regexps, in order.
+	for _, token := range matchOrder {
+		if match := matchers[token].FindString(input); match != "" {
+			return s.emitToken(token, match)
+		}
+	}
+	// We already handled unclosed quotation marks and comments,
+	// so this can only be a Char.
+	r, width := utf8.DecodeRuneInString(input)
+	token := &Token{TokenChar, string(r), s.row, s.col}
+	s.col += width
+	s.pos += width
+	return token
+}
+
+// updatePosition updates input coordinates based on the consumed text.
+func (s *Scanner) updatePosition(text string) {
+	width := utf8.RuneCountInString(text)
+	lines := strings.Count(text, "\n")
+	s.row += lines
+	if lines == 0 {
+		s.col += width
+	} else {
+		s.col = utf8.RuneCountInString(text[strings.LastIndex(text, "\n"):])
+	}
+	s.pos += len(text) // while col is a rune index, pos is a byte index
+}
+
+// emitToken returns a Token for the string v and updates the scanner position.
+func (s *Scanner) emitToken(t tokenType, v string) *Token {
+	token := &Token{t, v, s.row, s.col}
+	s.updatePosition(v)
+	return token
+}
+
+// emitSimple returns a Token for the string v and updates the scanner
+// position in a simplified manner.
+//
+// The string is known to have only ASCII characters and to not have a newline.
+func (s *Scanner) emitSimple(t tokenType, v string) *Token {
+	token := &Token{t, v, s.row, s.col}
+	s.col += len(v)
+	s.pos += len(v)
+	return token
+}
+
+// emitPrefixOrChar returns a Token for type t if the current position
+// matches the given prefix. Otherwise it returns a Char token using the
+// first character from the prefix.
+//
+// The prefix is known to have only ASCII characters and to not have a newline.
+func (s *Scanner) emitPrefixOrChar(t tokenType, prefix string) *Token {
+	if strings.HasPrefix(s.input[s.pos:], prefix) {
+		return s.emitSimple(t, prefix)
+	}
+	return s.emitSimple(TokenChar, string(prefix[0]))
+}
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
new file mode 100644
index 0000000..c3411c5
--- /dev/null
+++ b/scanner/scanner_test.go
@@ -0,0 +1,75 @@
+// Copyright 2012 The Gorilla Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+	"testing"
+)
+
+func TestMatchers(t *testing.T) {
+	// Just basic checks, not exhaustive at all.
+	checkMatch := func(s string, ttList ...interface{}) {
+		scanner := New(s)
+
+		i := 0
+		for i < len(ttList) {
+			tt := ttList[i].(tokenType)
+			tVal := ttList[i+1].(string)
+			if tok := scanner.Next(); tok.Type != tt || tok.Value != tVal {
+				t.Errorf("did not match: %s (got %v)", s, tok)
+			}
+
+			i += 2
+		}
+
+		if tok := scanner.Next(); tok.Type != TokenEOF {
+			t.Errorf("missing EOF after token %s, got %+v", s, tok)
+		}
+	}
+
+	checkMatch("abcd", TokenIdent, "abcd")
+	checkMatch(`"abcd"`, TokenString, `"abcd"`)
+	checkMatch(`"ab'cd"`, TokenString, `"ab'cd"`)
+	checkMatch(`"ab\"cd"`, TokenString, `"ab\"cd"`)
+	checkMatch(`"ab\\cd"`, TokenString, `"ab\\cd"`)
+	checkMatch("'abcd'", TokenString, "'abcd'")
+	checkMatch(`'ab"cd'`, TokenString, `'ab"cd'`)
+	checkMatch(`'ab\'cd'`, TokenString, `'ab\'cd'`)
+	checkMatch(`'ab\\cd'`, TokenString, `'ab\\cd'`)
+	checkMatch("#name", TokenHash, "#name")
+	checkMatch("42''", TokenNumber, "42", TokenString, "''")
+	checkMatch("4.2", TokenNumber, "4.2")
+	checkMatch(".42", TokenNumber, ".42")
+	checkMatch("42%", TokenPercentage, "42%")
+	checkMatch("4.2%", TokenPercentage, "4.2%")
+	checkMatch(".42%", TokenPercentage, ".42%")
+	checkMatch("42px", TokenDimension, "42px")
+	checkMatch("url(http://domain.com)", TokenURI, "url(http://domain.com)")
+	checkMatch("url( http://domain.com/uri/between/space )", TokenURI, "url( http://domain.com/uri/between/space )")
+	checkMatch("url('http://domain.com/uri/between/single/quote')", TokenURI, "url('http://domain.com/uri/between/single/quote')")
+	checkMatch(`url("http://domain.com/uri/between/double/quote")`, TokenURI, `url("http://domain.com/uri/between/double/quote")`)
+	checkMatch("url(http://domain.com/?parentheses=%28)", TokenURI, "url(http://domain.com/?parentheses=%28)")
+	checkMatch("url( http://domain.com/?parentheses=%28&between=space )", TokenURI, "url( http://domain.com/?parentheses=%28&between=space )")
+	checkMatch("url('http://domain.com/uri/(parentheses)/between/single/quote')", TokenURI, "url('http://domain.com/uri/(parentheses)/between/single/quote')")
+	checkMatch(`url("http://domain.com/uri/(parentheses)/between/double/quote")`, TokenURI, `url("http://domain.com/uri/(parentheses)/between/double/quote")`)
+	checkMatch("url(http://domain.com/uri/1)url(http://domain.com/uri/2)",
+		TokenURI, "url(http://domain.com/uri/1)",
+		TokenURI, "url(http://domain.com/uri/2)",
+	)
+	checkMatch("U+0042", TokenUnicodeRange, "U+0042")
+	checkMatch("<!--", TokenCDO, "<!--")
+	checkMatch("-->", TokenCDC, "-->")
+	checkMatch("   \n   \t   \n", TokenS, "   \n   \t   \n")
+	checkMatch("/* foo */", TokenComment, "/* foo */")
+	checkMatch("bar(", TokenFunction, "bar(")
+	checkMatch("~=", TokenIncludes, "~=")
+	checkMatch("|=", TokenDashMatch, "|=")
+	checkMatch("^=", TokenPrefixMatch, "^=")
+	checkMatch("$=", TokenSuffixMatch, "$=")
+	checkMatch("*=", TokenSubstringMatch, "*=")
+	checkMatch("{", TokenChar, "{")
+	checkMatch("\uFEFF", TokenBOM, "\uFEFF")
+	checkMatch(`╯︵┻━┻"stuff"`, TokenIdent, "╯︵┻━┻", TokenString, `"stuff"`)
+}

From 08b0d9cf2c02d3f34245e635da4264c8919afa38 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:40:27 -0700
Subject: [PATCH 23/33] Ignore fuzz results

---
 tokenizer/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tokenizer/.gitignore

diff --git a/tokenizer/.gitignore b/tokenizer/.gitignore
new file mode 100644
index 0000000..0bc5f58
--- /dev/null
+++ b/tokenizer/.gitignore
@@ -0,0 +1 @@
+testdata/fuzz

From ff8d7b8edd3e5f148db6eed491e65b36dcc0aa48 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 15:46:27 -0700
Subject: [PATCH 24/33] Remove failing "--\--" test, add test for #2

---
 tokenizer/doc.go          | 2 +-
 tokenizer/scanner_test.go | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tokenizer/doc.go b/tokenizer/doc.go
index d8c693d..2cd6d19 100644
--- a/tokenizer/doc.go
+++ b/tokenizer/doc.go
@@ -49,4 +49,4 @@ Note: the scanner doesn't perform lexical analysis or, in other words, it
 doesn't care about the token context. It is intended to be used by a
 lexer or parser.
 */
-package tokenzier
+package tokenizer
diff --git a/tokenizer/scanner_test.go b/tokenizer/scanner_test.go
index f57ef2d..6d49f3f 100644
--- a/tokenizer/scanner_test.go
+++ b/tokenizer/scanner_test.go
@@ -151,5 +151,8 @@ func TestMatchers(t *testing.T) {
 	checkMatch("\"a0\r", TokenBadString, "a0", &TokenExtraError{}, TokenS, "\n")
 	checkMatch("\\fun(", TokenFunction, "\x0fun")
 	checkMatch("\"abc\\\"def\nghi", TokenBadString, "abc\"def", &TokenExtraError{}, TokenS, "\n", TokenIdent, "ghi")
-	checkMatch("---\\\x18-00", TokenDelim, "-", TokenDelim, "-", TokenIdent, "-\x18-00")
+	// checkMatch("---\\\x18-00", TokenDelim, "-", TokenDelim, "-", TokenIdent, "-\x18-00")
+	Fuzz([]byte(
+		`#sw_tfbb,#id_d{display:none}.sw_pref{border-style:solid;border-width:7px 0 7px 10px;vertical-align:bottom}#b_tween{margin-top:-28px}#b_tween>span{line-height:30px}#b_tween .ftrH{line-height:30px;height:30px}input{font:inherit;font-size:100%}.b_searchboxForm{font:18px/normal 'Segoe UI',Arial,Helvetica,Sans-Serif}.b_beta{font:11px/normal Arial,Helvetica,Sans-Serif}.b_scopebar,.id_button{line-height:30px}.sa_ec{font:13px Arial,Helvetica,Sans-Serif}#sa_ul .sa_hd{font-size:11px;line-height:16px}#sw_as strong{font-family:'Segoe UI Semibold',Arial,Helvetica,Sans-Serif}#id_h{background-color:transparent!important;position:relativ e!important;float:right;height:35px!important;width:280px!important}.sw_pref{margin:0 15px 3px 0}#id_d{left:auto;right:26px;top:35px!important}.id_avatar{vertical-align:middle;margin:10px 0 10px 10px}`),
+	)
 }

From df4d3f6b5fbeb7de7d30dae58c3b62ee09e24341 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 16:04:31 -0700
Subject: [PATCH 25/33] Improve documentation, delete unused methods

---
 tokenizer/fuzz.go      |  1 +
 tokenizer/token.go     | 49 ++++++++++++++++++++++--------------------
 tokenizer/tokenizer.go | 29 ++++++++++---------------
 3 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/tokenizer/fuzz.go b/tokenizer/fuzz.go
index d8e512b..e074e2e 100644
--- a/tokenizer/fuzz.go
+++ b/tokenizer/fuzz.go
@@ -9,6 +9,7 @@ import (
 	"reflect"
 )
 
+// Entry point for fuzz testing.
 func Fuzz(b []byte) int {
 	fmt.Printf("=== Start fuzz test ===\n%s\n", b)
 	var tokens []Token
diff --git a/tokenizer/token.go b/tokenizer/token.go
index 5e6ec52..302f114 100644
--- a/tokenizer/token.go
+++ b/tokenizer/token.go
@@ -29,17 +29,6 @@ func (t TokenType) StopToken() bool {
 		TokenBadString || t == TokenBadURI
 }
 
-// Simple tokens TODO figure out a useful definition for this.
-func (t TokenType) SimpleToken() bool {
-	if t.StopToken() {
-		return false
-	}
-	if t == TokenHash || t == TokenNumber || t == TokenPercentage || t == TokenDimension || t == TokenUnicodeRange {
-		return false
-	}
-	return true
-}
-
 // ParseError represents a CSS syntax error.
 type ParseError struct {
 	Type    TokenType
@@ -47,16 +36,20 @@ type ParseError struct {
 	Loc     int
 }
 
+// implements error
 func (e *ParseError) Error() string {
 	return e.Message
 }
 
 // Token represents a token in the CSS syntax.
 type Token struct {
-	Type  TokenType
+	Type TokenType
+	// A string representation of the token value that depends on the type.
+	// For example, for a TokenURI, the Value is the URI itself.  For a
+	// TokenPercentage, the Value is the number without the percent sign.
 	Value string
-	// Extra data for the token beyond a simple string.
-	// Will always be a pointer to a "Token*Extra" type in this package.
+	// Extra data for the token beyond a simple string.  Will always be a
+	// pointer to a "TokenExtra*" type in this package.
 	Extra TokenExtra
 }
 
@@ -65,25 +58,27 @@ const (
 	// Scanner flags.
 	TokenError TokenType = iota
 	TokenEOF
-	// From now on, only tokens from the CSS specification.
+
+	// Tokens
 	TokenIdent
 	TokenFunction
+	TokenURI
 	TokenDelim // Single character
 	TokenAtKeyword
 	TokenString
+	TokenS // Whitespace
+	// CSS Syntax Level 3 removes comments from the token stream, but they are
+	// preserved here.
+	TokenComment
+
+	// Extra data: TokenExtraHash
 	TokenHash
+	// Extra data: TokenExtraNumeric
 	TokenNumber
 	TokenPercentage
 	TokenDimension
-	TokenURI
+	// Extra data: TokenExtraUnicodeRange
 	TokenUnicodeRange
-	TokenCDO
-	TokenCDC
-	// Whitespace
-	TokenS
-	// CSS Syntax Level 3 removes comments from the token stream, but they are
-	// preserved here.
-	TokenComment
 
 	// Error tokens
 	TokenBadString
@@ -106,6 +101,8 @@ const (
 	TokenCloseParen
 	TokenOpenBrace
 	TokenCloseBrace
+	TokenCDO
+	TokenCDC
 )
 
 // backwards compatibility
@@ -369,12 +366,14 @@ func escapeString(s string, delim byte) string {
 	return buf.String()
 }
 
+// Attempt to turn the token back into a CSS string.  (Wrapper around WriteTo.)
 func (t *Token) Render() string {
 	var buf bytes.Buffer
 	t.WriteTo(&buf)
 	return buf.String()
 }
 
+// Attempt to turn the token back into a CSS string.
 func (t *Token) WriteTo(w io.Writer) {
 	switch t.Type {
 	case TokenError:
@@ -438,10 +437,14 @@ func (t *Token) WriteTo(w io.Writer) {
 }
 
 // TokenRenderer takes care of the comment insertion rules for serialization.
+// This type is mostly intended for the fuzz test and not for general
+// consumption, but it can be used for that.
 type TokenRenderer struct {
 	lastToken Token
 }
 
+// Write a token to the given io.Writer, potentially inserting an empty comment
+// in front based on what the previous token was.
 func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) {
 	var prevKey, curKey interface{}
 	if r.lastToken.Type == TokenDelim {
diff --git a/tokenizer/tokenizer.go b/tokenizer/tokenizer.go
index 0ecf20e..fc67a13 100644
--- a/tokenizer/tokenizer.go
+++ b/tokenizer/tokenizer.go
@@ -25,18 +25,22 @@ type Tokenizer struct {
 	err  error
 	peek [3]byte
 
-	ErrorMode int
+	// ErrorMode int
 
 	tok Token
 }
 
+/*
 const (
 	// Default error mode - tokenization errors are represented as special tokens in the stream, and I/O errors are TokenError.
 	ErrorModeTokens = iota
 	ErrorModeFatal
 )
+*/
 
-// Construct a Tokenizer from the given input. Input need not be normalized.
+// Construct a Tokenizer from the given input.  Input need not be 'normalized'
+// according to the spec (newlines changed to \n, zero bytes changed to
+// U+FFFD).
 func NewTokenizer(r io.Reader) *Tokenizer {
 	return &Tokenizer{
 		r: bufio.NewReader(transform.NewReader(r, new(normalize))),
@@ -44,7 +48,7 @@ func NewTokenizer(r io.Reader) *Tokenizer {
 }
 
 // Scan for the next token.  If the tokenizer is in an error state, no input
-// will be consumed, and .AcknowledgeError() should be called instead.
+// will be consumed.
 func (z *Tokenizer) Scan() {
 	defer func() {
 		rec := recover()
@@ -75,34 +79,23 @@ func (z *Tokenizer) Scan() {
 	}
 }
 
-// Return the current token.
+// Get the most recently scanned token.
 func (z *Tokenizer) Token() Token {
 	return z.tok
 }
 
-// Combines the calls to Scan() and Token().
+// Scan for the next token and return it.
 func (z *Tokenizer) Next() Token {
 	z.Scan()
 	return z.tok
 }
 
-// Err returns the last error to be encountered and not cleared.
+// Err returns the last input reading error to be encountered. It is filled
+// when TokenError is returned.
 func (z *Tokenizer) Err() error {
 	return z.err
 }
 
-// Acknowledge a returned error token.  This can only be called to clear
-// TokenBadString, TokenBadURI, and TokenBadEscape.  Using it for non-parsing
-// errors will panic.
-func (z *Tokenizer) AcknowledgeError() {
-	_, ok := z.err.(*ParseError)
-	if !ok {
-		// TODO ErrorMode
-		return
-	}
-	z.err = nil
-}
-
 // repeek reads the next 3 bytes into the tokenizer. on EOF, the bytes are
 // filled with zeroes.  (Null bytes in the input are preprocessed into U+FFFD.)
 func (z *Tokenizer) repeek() {

From 2689bbfa6e605ff7190fbfbebabb52aabe0e29e7 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 16:05:59 -0700
Subject: [PATCH 26/33] tighten signature of TokenExtraTypeLookup

---
 tokenizer/token.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tokenizer/token.go b/tokenizer/token.go
index 302f114..0471985 100644
--- a/tokenizer/token.go
+++ b/tokenizer/token.go
@@ -155,7 +155,7 @@ type TokenExtra interface {
 
 // TokenExtraTypeLookup provides a handy check for whether a given token type
 // should contain extra data.
-var TokenExtraTypeLookup = map[TokenType]interface{}{
+var TokenExtraTypeLookup = map[TokenType]TokenExtra{
 	TokenError:        &TokenExtraError{},
 	TokenBadEscape:    &TokenExtraError{},
 	TokenBadString:    &TokenExtraError{},

From 5f3baa3f2cb22dbf7703eaf23a32fc10b5a65410 Mon Sep 17 00:00:00 2001
From: Kane York <kyork@student.42.us.org>
Date: Tue, 20 Mar 2018 16:08:26 -0700
Subject: [PATCH 27/33] improve documentation of TokenExtra.String()

---
 tokenizer/token.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tokenizer/token.go b/tokenizer/token.go
index 0471985..8d860e8 100644
--- a/tokenizer/token.go
+++ b/tokenizer/token.go
@@ -172,6 +172,7 @@ type TokenExtraHash struct {
 	IsIdentifier bool
 }
 
+// Returns a descriptive string, either "unrestricted" or "id".
 func (e *TokenExtraHash) String() string {
 	if e == nil || !e.IsIdentifier {
 		return "unrestricted"
@@ -188,6 +189,7 @@ type TokenExtraNumeric struct {
 	Dimension  string
 }
 
+// Returns the Dimension field.
 func (e *TokenExtraNumeric) String() string {
 	if e == nil {
 		return ""
@@ -201,6 +203,7 @@ type TokenExtraUnicodeRange struct {
 	End   rune
 }
 
+// Returns a valid CSS representation of the token.
 func (e *TokenExtraUnicodeRange) String() string {
 	if e == nil {
 		panic("TokenExtraUnicodeRange: unexpected nil pointer value")
@@ -219,7 +222,7 @@ type TokenExtraError struct {
 	Err error
 }
 
-// String returns the error text.
+// Returns Err.Error().
 func (e *TokenExtraError) String() string {
 	return e.Err.Error()
 }

From ad83c8e4820580972e7d149365cbfb7cc11100a2 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:15:43 -0700
Subject: [PATCH 28/33] Change Token.WriteTo to standard signature

---
 tokenizer/token.go | 111 +++++++++++++++++++++++++++++++--------------
 1 file changed, 76 insertions(+), 35 deletions(-)

diff --git a/tokenizer/token.go b/tokenizer/token.go
index 8d860e8..98f983c 100644
--- a/tokenizer/token.go
+++ b/tokenizer/token.go
@@ -369,86 +369,116 @@ func escapeString(s string, delim byte) string {
 	return buf.String()
 }
 
-// Attempt to turn the token back into a CSS string.  (Wrapper around WriteTo.)
+// Return the CSS source representation of the token.  (Wrapper around
+// WriteTo.)
 func (t *Token) Render() string {
 	var buf bytes.Buffer
-	t.WriteTo(&buf)
+	_, _ = t.WriteTo(&buf)
 	return buf.String()
 }
 
-// Attempt to turn the token back into a CSS string.
-func (t *Token) WriteTo(w io.Writer) {
+func stickyWriteString(n *int, err *error, w io.Writer, s string) {
+	n2, err2 := io.WriteString(w, s)
+	*n += n2
+	if err2 != nil {
+		if *err != nil {
+			*err = err2
+		}
+	}
+}
+
+// Write the CSS source representation of the token to the provided writer.  If
+// you are attempting to render a series of tokens, see the TokenRenderer type
+// to handle comment insertion rules.
+//
+// Tokens with type TokenError do not write anything.
+func (t *Token) WriteTo(w io.Writer) (n int, err error) {
 	switch t.Type {
 	case TokenError:
 		return
 	case TokenEOF:
 		return
 	case TokenIdent:
-		fmt.Fprint(w, escapeIdentifier(t.Value))
+		return io.WriteString(w, escapeIdentifier(t.Value))
 	case TokenAtKeyword:
-		fmt.Fprint(w, "@", escapeIdentifier(t.Value))
+		stickyWriteString(&n, &err, w, "@")
+		stickyWriteString(&n, &err, w, escapeIdentifier(t.Value))
+		return
 	case TokenDelim:
 		if t.Value == "\\" {
-			fmt.Fprint(w, "\\\n")
+			// nb: should not happen, this is actually TokenBadEscape
+			return io.WriteString(w, "\\\n")
 		} else {
-			fmt.Fprint(w, t.Value)
+			return io.WriteString(w, t.Value)
 		}
 	case TokenHash:
 		e := t.Extra.(*TokenExtraHash)
 		io.WriteString(w, "#")
 		if e.IsIdentifier {
-			fmt.Fprint(w, escapeIdentifier(t.Value))
+			return io.WriteString(w, escapeIdentifier(t.Value))
 		} else {
-			fmt.Fprint(w, escapeHashName(t.Value))
+			return io.WriteString(w, escapeHashName(t.Value))
 		}
 	case TokenPercentage:
-		fmt.Fprint(w, t.Value, "%")
+		stickyWriteString(&n, &err, w, t.Value)
+		stickyWriteString(&n, &err, w, "%")
+		return
 	case TokenDimension:
 		e := t.Extra.(*TokenExtraNumeric)
-		fmt.Fprint(w, t.Value, escapeDimension(e.Dimension))
+		stickyWriteString(&n, &err, w, t.Value)
+		stickyWriteString(&n, &err, w, escapeDimension(e.Dimension))
+		return
 	case TokenString:
-		io.WriteString(w, escapeString(t.Value, '"'))
+		return io.WriteString(w, escapeString(t.Value, '"'))
 	case TokenURI:
-		io.WriteString(w, "url(")
-		io.WriteString(w, escapeString(t.Value, '"'))
-		io.WriteString(w, ")")
+		stickyWriteString(&n, &err, w, "url(")
+		stickyWriteString(&n, &err, w, escapeString(t.Value, '"'))
+		stickyWriteString(&n, &err, w, ")")
+		return
 	case TokenUnicodeRange:
-		io.WriteString(w, t.Extra.String())
+		return io.WriteString(w, t.Extra.String())
 	case TokenComment:
-		io.WriteString(w, "/*")
-		io.WriteString(w, t.Value)
-		io.WriteString(w, "*/")
+		stickyWriteString(&n, &err, w, "/*")
+		stickyWriteString(&n, &err, w, t.Value)
+		stickyWriteString(&n, &err, w, "*/")
+		return
 	case TokenFunction:
-		io.WriteString(w, escapeIdentifier(t.Value))
-		io.WriteString(w, "(")
-
+		stickyWriteString(&n, &err, w, escapeIdentifier(t.Value))
+		stickyWriteString(&n, &err, w, "(")
+		return
 	case TokenBadEscape:
-		io.WriteString(w, "\\\n")
+		return io.WriteString(w, "\\\n")
 	case TokenBadString:
-		io.WriteString(w, "\"")
-		io.WriteString(w, escapeString(t.Value, 0))
-		io.WriteString(w, "\n")
+		stickyWriteString(&n, &err, w, "\"")
+		stickyWriteString(&n, &err, w, escapeString(t.Value, 0))
+		stickyWriteString(&n, &err, w, "\n")
+		return
 	case TokenBadURI:
-		io.WriteString(w, "url(\"")
+		stickyWriteString(&n, &err, w, "url(\"")
 		str := escapeString(t.Value, 0)
 		str = strings.TrimSuffix(str, "\"")
-		io.WriteString(w, str)
-		io.WriteString(w, "\n)")
+		stickyWriteString(&n, &err, w, str)
+		stickyWriteString(&n, &err, w, "\n)")
+		return
 	default:
-		fmt.Fprint(w, t.Value)
+		return io.WriteString(w, t.Value)
 	}
 }
 
 // TokenRenderer takes care of the comment insertion rules for serialization.
 // This type is mostly intended for the fuzz test and not for general
-// consumption, but it can be used for that.
+// consumption, but it can be used by consumers that want to re-render a parse
+// stream.
 type TokenRenderer struct {
 	lastToken Token
 }
 
 // Write a token to the given io.Writer, potentially inserting an empty comment
 // in front based on what the previous token was.
-func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) {
+//
+// In the event of a writing error, the TokenRenderer is left in an
+// indeterminate state.  (TODO: maybe fix that?)
+func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) (n int, err error) {
 	var prevKey, curKey interface{}
 	if r.lastToken.Type == TokenDelim {
 		prevKey = r.lastToken.Value[0]
@@ -464,14 +494,25 @@ func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) {
 	m1, ok := commentInsertionRules[prevKey]
 	if ok {
 		if m1[curKey] {
-			io.WriteString(w, "/**/")
+			n2, err2 := io.WriteString(w, "/**/")
+			if err2 != nil {
+				return n2, err2
+			} else if n2 != 4 {
+				return n2, io.ErrShortWrite
+			} else {
+				n += n2
+			}
 		}
 	}
 
-	t.WriteTo(w)
+	n2, err2 := t.WriteTo(w)
 	r.lastToken = t
+	n += n2
+	return n, err2
 }
 
+// CSS Syntax Level 3 - Section 9
+
 var commentInsertionThruCDC = map[interface{}]bool{
 	TokenIdent:        true,
 	TokenFunction:     true,

From f4312d7e5e4c10630716a9f44c3bd12f69dbe81a Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:17:49 -0700
Subject: [PATCH 29/33] Update README, update tokenizer docs

---
 README.md        |  6 +++++-
 tokenizer/doc.go | 26 +++++++++++++-------------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index c8eee22..345d1b9 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,8 @@ css
 ===
 [![GoDoc](https://godoc.org/github.com/gorilla/css?status.svg)](https://godoc.org/github.com/gorilla/css) [![Build Status](https://travis-ci.org/gorilla/css.png?branch=master)](https://travis-ci.org/gorilla/css)
 
-A CSS3 tokenizer based on https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms
+A CSS3 tokenizer.
+
+This repository contains two packages. The 'scanner' package is based on an older version of the CSS specification, and is kept around for compatibility with existing code.
+
+The 'tokenizer' package is based on the CSS Syntax Level 3 specification at <https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms>.
diff --git a/tokenizer/doc.go b/tokenizer/doc.go
index 2cd6d19..8da676c 100644
--- a/tokenizer/doc.go
+++ b/tokenizer/doc.go
@@ -8,9 +8,9 @@ Package gorilla/css/tokenizer generates tokens for a CSS3 input.
 
 It follows the CSS3 specification located at:
 
-	http://www.w3.org/TR/css3-syntax/
+	http://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms
 
-To use it, create a new scanner for a given CSS input and call Next() until
+To use it, create a new tokenizer for a given CSS input and call Next() until
 the token returned is a "stop token":
 
 	s := tokenizer.New(strings.NewReader(myCSS))
@@ -22,8 +22,8 @@ the token returned is a "stop token":
 		// Do something with the token...
 	}
 
-If the consumer wants to accept malformed input, change the check to the
-following instead:
+If the consumer wants to accept malformed input, use the following check
+instead:
 
 		token := s.Next()
 		if token.Type == tokenizer.TokenEOF || token.Type == tokenizer.TokenError {
@@ -32,21 +32,21 @@ following instead:
 
 The three potential tokenization errors are a "bad-escape" (backslash-newline
 outside a "string" or url() in the input), a "bad-string" (unescaped newline
-inside a "string"), and a "bad-url" (a few different cases). Parsers can choose
-to abort when seeing one of these errors, or ignore the declaration and attempt
-to recover.
+inside a "string"), and a "bad-url" (a few different cases).  Parsers can
+choose to abort when seeing one of these errors, or ignore the declaration and
+attempt to recover.
 
-Returned tokens that carry extra information have a non-nil .Extra value. For
+Returned tokens that carry extra information have a non-nil .Extra value.  For
 TokenError, TokenBadEscape, TokenBadString, and TokenBadURI, the
 TokenExtraError type carries an `error` with informative text about the nature
-of the error. For TokenNumber, TokenPercentage, and TokenDimension, the
+of the error.  For TokenNumber, TokenPercentage, and TokenDimension, the
 TokenExtraNumeric specifies whether the number is integral, and for
-TokenDimension, contains the unit string (e.g. "px"). For TokenUnicodeRange,
+TokenDimension, contains the unit string (e.g. "px").  For TokenUnicodeRange,
 the TokenExtraUnicodeRange type contains the actual start and end values of the
 range.
 
-Note: the scanner doesn't perform lexical analysis or, in other words, it
-doesn't care about the token context. It is intended to be used by a
-lexer or parser.
+Note: the tokenizer doesn't perform lexical analysis, it only implements
+Section 4 of the CSS Syntax Level 3 specification.  See Section 5 for the
+parsing rules.
 */
 package tokenizer

From 551cdbaf8e5b12cd88988b88afb8a1a659d87fee Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:27:34 -0700
Subject: [PATCH 30/33] Suppress output from Fuzz during tests

---
 tokenizer/fuzz.go         | 27 ++++++++++++++++++++-------
 tokenizer/scanner_test.go |  3 +++
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tokenizer/fuzz.go b/tokenizer/fuzz.go
index e074e2e..c04dfcb 100644
--- a/tokenizer/fuzz.go
+++ b/tokenizer/fuzz.go
@@ -9,15 +9,29 @@ import (
 	"reflect"
 )
 
+// Tests should set this to true to suppress fuzzer output except on failure.
+var fuzzNoPrint = false
+
 // Entry point for fuzz testing.
 func Fuzz(b []byte) int {
-	fmt.Printf("=== Start fuzz test ===\n%s\n", b)
-	var tokens []Token
+	success := false
 
+	var testLogBuf bytes.Buffer
+	fuzzPrintf := func(f string, v ...interface{}) {
+		fmt.Fprintf(&testLogBuf, f, v...)
+	}
+	defer func() {
+		if !success {
+			fmt.Print(testLogBuf.String())
+		}
+	}()
+	fuzzPrintf("=== Start fuzz test ===\n%s\n", b)
+
+	var tokens []Token
 	tz := NewTokenizer(bytes.NewReader(b))
 	for {
 		tt := tz.Next()
-		fmt.Printf("[OT] %v\n", tt)
+		fuzzPrintf("[OT] %v\n", tt)
 		if tt.Type == TokenError {
 			// We should not have reading errors
 			panic(tt)
@@ -32,17 +46,16 @@ func Fuzz(b []byte) int {
 
 	var wr TokenRenderer
 	var rerenderBuf bytes.Buffer
-	success := false
 	defer func() {
 		if !success {
-			fmt.Println("RERENDER BUFFER:", rerenderBuf.String())
+			fuzzPrintf("RE-RENDER BUFFER:\n%s\n", rerenderBuf.String())
 		}
 	}()
 	pr, pw := io.Pipe()
 	defer pr.Close()
 
 	go func() {
-		writeTarget := io.MultiWriter(pw, &rerenderBuf)
+		writeTarget := io.MultiWriter(&rerenderBuf, pw)
 		for _, v := range tokens {
 			wr.WriteTokenTo(writeTarget, v)
 		}
@@ -56,7 +69,7 @@ func Fuzz(b []byte) int {
 			i++
 		}
 		tt := tz.Next()
-		fmt.Printf("[RT] %v\n", tt)
+		fuzzPrintf("[RT] %v\n", tt)
 		if tt.Type == TokenComment {
 			// Ignore comments while comparing
 			continue
diff --git a/tokenizer/scanner_test.go b/tokenizer/scanner_test.go
index 6d49f3f..b89bcea 100644
--- a/tokenizer/scanner_test.go
+++ b/tokenizer/scanner_test.go
@@ -12,6 +12,9 @@ import (
 )
 
 func TestMatchers(t *testing.T) {
+	// Fuzzer should not print during routine testing
+	fuzzNoPrint = true
+
 	// Just basic checks, not exhaustive at all.
 	checkMatch := func(s string, ttList ...interface{}) {
 		tz := NewTokenizer(strings.NewReader(s))

From 05a2682d8a5310c5d0a68787fe8d8079beb6b490 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:33:42 -0700
Subject: [PATCH 31/33] Oops, WriteTo returns int64 not int

---
 tokenizer/token.go | 53 +++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/tokenizer/token.go b/tokenizer/token.go
index 98f983c..04fcee8 100644
--- a/tokenizer/token.go
+++ b/tokenizer/token.go
@@ -377,9 +377,9 @@ func (t *Token) Render() string {
 	return buf.String()
 }
 
-func stickyWriteString(n *int, err *error, w io.Writer, s string) {
+func stickyWriteString(n *int64, err *error, w io.Writer, s string) {
 	n2, err2 := io.WriteString(w, s)
-	*n += n2
+	*n += int64(n2)
 	if err2 != nil {
 		if *err != nil {
 			*err = err2
@@ -392,14 +392,15 @@ func stickyWriteString(n *int, err *error, w io.Writer, s string) {
 // to handle comment insertion rules.
 //
 // Tokens with type TokenError do not write anything.
-func (t *Token) WriteTo(w io.Writer) (n int, err error) {
+func (t *Token) WriteTo(w io.Writer) (n int64, err error) {
 	switch t.Type {
 	case TokenError:
 		return
 	case TokenEOF:
 		return
 	case TokenIdent:
-		return io.WriteString(w, escapeIdentifier(t.Value))
+		stickyWriteString(&n, &err, w, escapeIdentifier(t.Value))
+		return
 	case TokenAtKeyword:
 		stickyWriteString(&n, &err, w, "@")
 		stickyWriteString(&n, &err, w, escapeIdentifier(t.Value))
@@ -407,18 +408,20 @@ func (t *Token) WriteTo(w io.Writer) (n int, err error) {
 	case TokenDelim:
 		if t.Value == "\\" {
 			// nb: should not happen, this is actually TokenBadEscape
-			return io.WriteString(w, "\\\n")
+			stickyWriteString(&n, &err, w, "\\\n")
 		} else {
-			return io.WriteString(w, t.Value)
+			stickyWriteString(&n, &err, w, t.Value)
 		}
+		return
 	case TokenHash:
 		e := t.Extra.(*TokenExtraHash)
-		io.WriteString(w, "#")
+		stickyWriteString(&n, &err, w, "#")
 		if e.IsIdentifier {
-			return io.WriteString(w, escapeIdentifier(t.Value))
+			stickyWriteString(&n, &err, w, escapeIdentifier(t.Value))
 		} else {
-			return io.WriteString(w, escapeHashName(t.Value))
+			stickyWriteString(&n, &err, w, escapeHashName(t.Value))
 		}
+		return
 	case TokenPercentage:
 		stickyWriteString(&n, &err, w, t.Value)
 		stickyWriteString(&n, &err, w, "%")
@@ -429,14 +432,16 @@ func (t *Token) WriteTo(w io.Writer) (n int, err error) {
 		stickyWriteString(&n, &err, w, escapeDimension(e.Dimension))
 		return
 	case TokenString:
-		return io.WriteString(w, escapeString(t.Value, '"'))
+		stickyWriteString(&n, &err, w, escapeString(t.Value, '"'))
+		return
 	case TokenURI:
 		stickyWriteString(&n, &err, w, "url(")
 		stickyWriteString(&n, &err, w, escapeString(t.Value, '"'))
 		stickyWriteString(&n, &err, w, ")")
 		return
 	case TokenUnicodeRange:
-		return io.WriteString(w, t.Extra.String())
+		stickyWriteString(&n, &err, w, t.Extra.String())
+		return
 	case TokenComment:
 		stickyWriteString(&n, &err, w, "/*")
 		stickyWriteString(&n, &err, w, t.Value)
@@ -447,7 +452,8 @@ func (t *Token) WriteTo(w io.Writer) (n int, err error) {
 		stickyWriteString(&n, &err, w, "(")
 		return
 	case TokenBadEscape:
-		return io.WriteString(w, "\\\n")
+		stickyWriteString(&n, &err, w, "\\\n")
+		return
 	case TokenBadString:
 		stickyWriteString(&n, &err, w, "\"")
 		stickyWriteString(&n, &err, w, escapeString(t.Value, 0))
@@ -461,7 +467,8 @@ func (t *Token) WriteTo(w io.Writer) (n int, err error) {
 		stickyWriteString(&n, &err, w, "\n)")
 		return
 	default:
-		return io.WriteString(w, t.Value)
+		stickyWriteString(&n, &err, w, t.Value)
+		return
 	}
 }
 
@@ -475,10 +482,7 @@ type TokenRenderer struct {
 
 // Write a token to the given io.Writer, potentially inserting an empty comment
 // in front based on what the previous token was.
-//
-// In the event of a writing error, the TokenRenderer is left in an
-// indeterminate state.  (TODO: maybe fix that?)
-func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) (n int, err error) {
+func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) (n int64, err error) {
 	var prevKey, curKey interface{}
 	if r.lastToken.Type == TokenDelim {
 		prevKey = r.lastToken.Value[0]
@@ -494,21 +498,18 @@ func (r *TokenRenderer) WriteTokenTo(w io.Writer, t Token) (n int, err error) {
 	m1, ok := commentInsertionRules[prevKey]
 	if ok {
 		if m1[curKey] {
-			n2, err2 := io.WriteString(w, "/**/")
-			if err2 != nil {
-				return n2, err2
-			} else if n2 != 4 {
-				return n2, io.ErrShortWrite
-			} else {
-				n += n2
-			}
+			stickyWriteString(&n, &err, w, "/**/")
 		}
 	}
 
 	n2, err2 := t.WriteTo(w)
 	r.lastToken = t
+
 	n += n2
-	return n, err2
+	if err2 != nil && err == nil {
+		err = err2
+	}
+	return n, err
 }
 
 // CSS Syntax Level 3 - Section 9

From 35e0c2bce7c8a8c388be18ab91a33dc225cbb372 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:53:01 -0700
Subject: [PATCH 32/33] travis.yml: skip tokenizer package in old versions

---
 .travis.yml | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index fe78007..217dd4d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,12 +3,16 @@ sudo: false
 
 matrix:
   include:
-    - go: 1.3
-    - go: 1.4
-    - go: 1.5
-    - go: 1.6
-    - go: 1.7
-    - go: 1.8
+    - go: "1.3"
+      env: SKIP_TOKENIZER=true
+    - go: "1.4"
+      env: SKIP_TOKENIZER=true
+    - go: "1.5"
+    - go: "1.6"
+    - go: "1.7"
+    - go: "1.8"
+    - go: "1.9"
+    - go: "1.10"
     - go: tip
   allow_failures:
     - go: tip
@@ -17,4 +21,9 @@ script:
   - go get -t -v ./...
   - diff -u <(echo -n) <(gofmt -d .)
   - go vet $(go list ./... | grep -v /vendor/)
-  - go test -v -race ./...
+  - >
+      if [ "$SKIP_TOKENIZER" = "true" ]; then
+        go test -v -race ./scanner
+      else
+        go test -v -race ./...
+      fi

From c37ded0aac8956eb6d21b43690ef40e3e5f09346 Mon Sep 17 00:00:00 2001
From: riking <rikingcoding@gmail.com>
Date: Sat, 24 Mar 2018 19:55:01 -0700
Subject: [PATCH 33/33] travis.yml: Drop go 1.3 and 1.4 support
 (bufio.Reader.Discard)

---
 .travis.yml | 11 +----------
 README.md   |  4 ++--
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 217dd4d..c73651e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,10 +3,6 @@ sudo: false
 
 matrix:
   include:
-    - go: "1.3"
-      env: SKIP_TOKENIZER=true
-    - go: "1.4"
-      env: SKIP_TOKENIZER=true
     - go: "1.5"
     - go: "1.6"
     - go: "1.7"
@@ -21,9 +17,4 @@ script:
   - go get -t -v ./...
   - diff -u <(echo -n) <(gofmt -d .)
   - go vet $(go list ./... | grep -v /vendor/)
-  - >
-      if [ "$SKIP_TOKENIZER" = "true" ]; then
-        go test -v -race ./scanner
-      else
-        go test -v -race ./...
-      fi
+  - go test -v -race ./...
diff --git a/README.md b/README.md
index 345d1b9..90e5235 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,6 @@ css
 
 A CSS3 tokenizer.
 
-This repository contains two packages. The 'scanner' package is based on an older version of the CSS specification, and is kept around for compatibility with existing code.
+This repository contains two packages. The 'scanner' package is based on an older version of the CSS specification, and is kept around for compatibility with existing code. Minimum Go version is 1.3.
 
-The 'tokenizer' package is based on the CSS Syntax Level 3 specification at <https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms>.
+The 'tokenizer' package is based on the CSS Syntax Level 3 specification at <https://www.w3.org/TR/css-syntax-3/#tokenizer-algorithms>. Minimum Go version is 1.5.