123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- #region Copyright and License
- //
- // Fizzler - CSS Selector Engine for Microsoft .NET Framework
- // Copyright (c) 2009 Atif Aziz, Colin Ramsay. All rights reserved.
- //
- // This library is free software; you can redistribute it and/or modify it under
- // the terms of the GNU Lesser General Public License as published by the Free
- // Software Foundation; either version 3 of the License, or (at your option)
- // any later version.
- //
- // This library is distributed in the hope that it will be useful, but WITHOUT
- // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with this library; if not, write to the Free Software Foundation, Inc.,
- // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- //
- #endregion
- #pragma warning disable
- namespace Fizzler
- {
- #region Imports
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.IO;
- using System.Text;
- #endregion
- /// <summary>
- /// Lexer for tokens in CSS selector grammar.
- /// </summary>
- public static class Tokener
- {
- /// <summary>
- /// Parses tokens from a given text source.
- /// </summary>
- public static IEnumerable<Token> Tokenize(TextReader reader)
- {
- if (reader == null) throw new ArgumentNullException("reader");
- return Tokenize(reader.ReadToEnd());
- }
- /// <summary>
- /// Parses tokens from a given string.
- /// </summary>
- public static IEnumerable<Token> Tokenize(string input)
- {
- var reader = new Reader(input ?? string.Empty);
- while (reader.Read() != null)
- {
- var ch = reader.Value;
- //
- // Identifier or function
- //
- if (ch == '-' || IsNmStart(ch))
- {
- reader.Mark();
- if (reader.Value == '-')
- {
- if (!IsNmStart(reader.Read()))
- throw new FormatException(string.Format("Invalid identifier at position {0}.", reader.Position));
- }
- while (IsNmChar(reader.Read())) { /* NOP */ }
- if (reader.Value == '(')
- yield return Token.Function(reader.Marked());
- else
- yield return Token.Ident(reader.MarkedWithUnread());
- }
- //
- // Integer
- //
- else if (IsDigit(ch))
- {
- reader.Mark();
- do { /* NOP */ } while (IsDigit(reader.Read()));
- yield return Token.Integer(reader.MarkedWithUnread());
- }
- //
- // Whitespace, including that which is coupled with some punctuation
- //
- else if (IsS(ch))
- {
- var space = ParseWhiteSpace(reader);
- ch = reader.Read();
- switch (ch)
- {
- case ',': yield return Token.Comma(); break;
- case '+': yield return Token.Plus(); break;
- case '>': yield return Token.Greater(); break;
- case '~': yield return Token.Tilde(); break;
- default:
- reader.Unread();
- yield return Token.WhiteSpace(space);
- break;
- }
- }
- else switch (ch)
- {
- case '*': // * or *=
- case '~': // ~ or ~=
- case '|': // | or |=
- {
- if (reader.Read() == '=')
- {
- yield return ch == '*'
- ? Token.SubstringMatch()
- : ch == '|' ? Token.DashMatch()
- : Token.Includes();
- }
- else
- {
- reader.Unread();
- yield return ch == '*' || ch == '|'
- ? Token.Char(ch.Value)
- : Token.Tilde();
- }
- break;
- }
- case '^': // ^=
- case '$': // $=
- {
- if (reader.Read() != '=')
- throw new FormatException(string.Format("Invalid character at position {0}.", reader.Position));
- switch (ch)
- {
- case '^': yield return Token.PrefixMatch(); break;
- case '$': yield return Token.SuffixMatch(); break;
- }
- break;
- }
- //
- // Single-character punctuation
- //
- case '.': yield return Token.Dot(); break;
- case ':': yield return Token.Colon(); break;
- case ',': yield return Token.Comma(); break;
- case '=': yield return Token.Equals(); break;
- case '[': yield return Token.LeftBracket(); break;
- case ']': yield return Token.RightBracket(); break;
- case ')': yield return Token.RightParenthesis(); break;
- case '+': yield return Token.Plus(); break;
- case '>': yield return Token.Greater(); break;
- case '#': yield return Token.Hash(ParseHash(reader)); break;
- //
- // Single- or double-quoted strings
- //
- case '\"':
- case '\'': yield return ParseString(reader, /* quote */ ch.Value); break;
- default:
- throw new FormatException(string.Format("Invalid character at position {0}.", reader.Position));
- }
- }
- yield return Token.Eoi();
- }
- private static string ParseWhiteSpace(Reader reader)
- {
- Debug.Assert(reader != null);
- reader.Mark();
- while (IsS(reader.Read())) { /* NOP */ }
- return reader.MarkedWithUnread();
- }
- private static string ParseHash(Reader reader)
- {
- Debug.Assert(reader != null);
- reader.MarkFromNext(); // skipping #
- while (IsNmChar(reader.Read())) { /* NOP */ }
- var text = reader.MarkedWithUnread();
- if (text.Length == 0)
- throw new FormatException(string.Format("Invalid hash at position {0}.", reader.Position));
- return text;
- }
- private static Token ParseString(Reader reader, char quote)
- {
- Debug.Assert(reader != null);
- //
- // TODO Support full string syntax!
- //
- // string {string1}|{string2}
- // string1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
- // string2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
- // nonascii [^\0-\177]
- // escape {unicode}|\\[^\n\r\f0-9a-f]
- // unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
- //
- var strpos = reader.Position;
- reader.MarkFromNext(); // skipping quote
- char? ch;
- StringBuilder sb = null;
- while ((ch = reader.Read()) != quote)
- {
- if (ch == null)
- throw new FormatException(string.Format("Unterminated string at position {0}.", strpos));
- if (ch == '\\')
- {
- ch = reader.Read();
- //
- // NOTE: Only escaping of quote and backslash supported!
- //
- if (ch != quote && ch != '\\')
- throw new FormatException(string.Format("Invalid escape sequence at position {0} in a string at position {1}.", reader.Position, strpos));
- if (sb == null)
- sb = new StringBuilder();
- sb.Append(reader.MarkedExceptLast());
- reader.Mark();
- }
- }
- var text = reader.Marked();
- if (sb != null)
- text = sb.Append(text).ToString();
- return Token.String(text);
- }
- private static bool IsDigit(char? ch) // [0-9]
- {
- return ch >= '0' && ch <= '9';
- }
- private static bool IsS(char? ch) // [ \t\r\n\f]
- {
- return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f';
- }
- private static bool IsNmStart(char? ch) // [_a-z]|{nonascii}|{escape}
- {
- return ch == '_'
- || (ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z');
- }
- private static bool IsNmChar(char? ch) // [_a-z0-9-]|{nonascii}|{escape}
- {
- return IsNmStart(ch) || ch == '-' || (ch >= '0' && ch <= '9');
- }
- private sealed class Reader
- {
- private readonly string _input;
- private int _index = -1;
- private int _start = -1;
- public Reader(string input)
- {
- _input = input;
- }
- private bool Ready { get { return _index >= 0 && _index < _input.Length; } }
- public char? Value { get { return Ready ? _input[_index] : (char?)null; } }
- public int Position { get { return _index + 1; } }
- public void Mark()
- {
- _start = _index;
- }
- public void MarkFromNext()
- {
- _start = _index + 1;
- }
- public string Marked()
- {
- return Marked(0);
- }
- public string MarkedExceptLast()
- {
- return Marked(-1);
- }
- private string Marked(int trim)
- {
- var start = _start;
- var count = Math.Min(_input.Length, _index + trim) - start;
- return count > 0
- ? _input.Substring(start, count)
- : string.Empty;
- }
- public char? Read()
- {
- _index = Position >= _input.Length ? _input.Length : _index + 1;
- return Value;
- }
- public void Unread()
- {
- _index = Math.Max(-1, _index - 1);
- }
- public string MarkedWithUnread()
- {
- var text = Marked();
- Unread();
- return text;
- }
- }
- }
- }
- #pragma warning restore
|