PRSDigital
/
3rdpartylibs


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
							#region Copyright and License
// 
// Fizzler - CSS Selector Engine for Microsoft .NET Framework
// Copyright (c) 2009 Atif Aziz, Colin Ramsay. All rights reserved.
// 
// This library is free software; you can redistribute it and/or modify it under 
// the terms of the GNU Lesser General Public License as published by the Free 
// Software Foundation; either version 3 of the License, or (at your option) 
// any later version.
// 
// This library is distributed in the hope that it will be useful, but WITHOUT 
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
// details.
// 
// You should have received a copy of the GNU Lesser General Public License 
// along with this library; if not, write to the Free Software Foundation, Inc., 
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
// 
#endregion

#pragma warning disable

namespace Fizzler
{
    #region Imports

    using System;
    using System.Collections.Generic;
    using System.Diagnostics;
    using System.IO;
    using System.Text;

    #endregion

    /// <summary>
    /// Lexer for tokens in CSS selector grammar.
    /// </summary>
    public static class Tokener
    {
        /// <summary>
        /// Parses tokens from a given text source.
        /// </summary>
        public static IEnumerable<Token> Tokenize(TextReader reader)
        {
            if (reader == null) throw new ArgumentNullException("reader");
            return Tokenize(reader.ReadToEnd());
        }

        /// <summary>
        /// Parses tokens from a given string.
        /// </summary>
        public static IEnumerable<Token> Tokenize(string input)
        {
            var reader = new Reader(input ?? string.Empty);

            while (reader.Read() != null)
            {
                var ch = reader.Value;

                //
                // Identifier or function
                //
                if (ch == '-' || IsNmStart(ch))
                {
                    reader.Mark();
                    if (reader.Value == '-')
                    {
                        if (!IsNmStart(reader.Read()))
                            throw new FormatException(string.Format("Invalid identifier at position {0}.", reader.Position));
                    }
                    while (IsNmChar(reader.Read())) { /* NOP */ }
                    if (reader.Value == '(')
                        yield return Token.Function(reader.Marked());
                    else
                        yield return Token.Ident(reader.MarkedWithUnread());
                }
                //
                // Integer
                //
                else if (IsDigit(ch))
                {
                    reader.Mark();
                    do { /* NOP */ } while (IsDigit(reader.Read()));
                    yield return Token.Integer(reader.MarkedWithUnread());
                }
                //
                // Whitespace, including that which is coupled with some punctuation
                //
                else if (IsS(ch))
                {
                    var space = ParseWhiteSpace(reader);
                    ch = reader.Read();
                    switch (ch)
                    {
                        case ',': yield return Token.Comma(); break;
                        case '+': yield return Token.Plus(); break;
                        case '>': yield return Token.Greater(); break;
                        case '~': yield return Token.Tilde(); break;

                        default:
                            reader.Unread();
                            yield return Token.WhiteSpace(space);
                            break;
                    }
                }
                else switch (ch)
                    {
                        case '*': // * or *=
                        case '~': // ~ or ~=
                        case '|': // | or |=
                            {
                                if (reader.Read() == '=')
                                {
                                    yield return ch == '*'
                                               ? Token.SubstringMatch()
                                               : ch == '|' ? Token.DashMatch()
                                               : Token.Includes();
                                }
                                else
                                {
                                    reader.Unread();
                                    yield return ch == '*' || ch == '|'
                                        ? Token.Char(ch.Value)
                                        : Token.Tilde();
                                }
                                break;
                            }
                        case '^': // ^=
                        case '$': // $=
                            {
                                if (reader.Read() != '=')
                                    throw new FormatException(string.Format("Invalid character at position {0}.", reader.Position));

                                switch (ch)
                                {
                                    case '^': yield return Token.PrefixMatch(); break;
                                    case '$': yield return Token.SuffixMatch(); break;
                                }
                                break;
                            }
                        //
                        // Single-character punctuation
                        //
                        case '.': yield return Token.Dot(); break;
                        case ':': yield return Token.Colon(); break;
                        case ',': yield return Token.Comma(); break;
                        case '=': yield return Token.Equals(); break;
                        case '[': yield return Token.LeftBracket(); break;
                        case ']': yield return Token.RightBracket(); break;
                        case ')': yield return Token.RightParenthesis(); break;
                        case '+': yield return Token.Plus(); break;
                        case '>': yield return Token.Greater(); break;
                        case '#': yield return Token.Hash(ParseHash(reader)); break;
                        //
                        // Single- or double-quoted strings
                        //
                        case '\"':
                        case '\'': yield return ParseString(reader, /* quote */ ch.Value); break;

                        default:
                            throw new FormatException(string.Format("Invalid character at position {0}.", reader.Position));
                    }
            }
            yield return Token.Eoi();
        }

        private static string ParseWhiteSpace(Reader reader)
        {
            Debug.Assert(reader != null);

            reader.Mark();
            while (IsS(reader.Read())) { /* NOP */ }
            return reader.MarkedWithUnread();
        }

        private static string ParseHash(Reader reader)
        {
            Debug.Assert(reader != null);

            reader.MarkFromNext(); // skipping #
            while (IsNmChar(reader.Read())) { /* NOP */ }
            var text = reader.MarkedWithUnread();
            if (text.Length == 0)
                throw new FormatException(string.Format("Invalid hash at position {0}.", reader.Position));
            return text;
        }

        private static Token ParseString(Reader reader, char quote)
        {
            Debug.Assert(reader != null);

            //
            // TODO Support full string syntax!
            //
            // string    {string1}|{string2}
            // string1   \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
            // string2   \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
            // nonascii  [^\0-\177]
            // escape    {unicode}|\\[^\n\r\f0-9a-f]
            // unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
            //

            var strpos = reader.Position;
            reader.MarkFromNext(); // skipping quote

            char? ch;
            StringBuilder sb = null;

            while ((ch = reader.Read()) != quote)
            {
                if (ch == null)
                    throw new FormatException(string.Format("Unterminated string at position {0}.", strpos));

                if (ch == '\\')
                {
                    ch = reader.Read();

                    //
                    // NOTE: Only escaping of quote and backslash supported!
                    //

                    if (ch != quote && ch != '\\')
                        throw new FormatException(string.Format("Invalid escape sequence at position {0} in a string at position {1}.", reader.Position, strpos));

                    if (sb == null)
                        sb = new StringBuilder();

                    sb.Append(reader.MarkedExceptLast());
                    reader.Mark();
                }
            }

            var text = reader.Marked();

            if (sb != null)
                text = sb.Append(text).ToString();

            return Token.String(text);
        }

        private static bool IsDigit(char? ch) // [0-9]
        {
            return ch >= '0' && ch <= '9';
        }

        private static bool IsS(char? ch) // [ \t\r\n\f]
        {
            return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == '\f';
        }

        private static bool IsNmStart(char? ch) // [_a-z]|{nonascii}|{escape}
        {
            return ch == '_'
                || (ch >= 'a' && ch <= 'z')
                || (ch >= 'A' && ch <= 'Z');
        }

        private static bool IsNmChar(char? ch) // [_a-z0-9-]|{nonascii}|{escape}
        {
            return IsNmStart(ch) || ch == '-' || (ch >= '0' && ch <= '9');
        }

        private sealed class Reader
        {
            private readonly string _input;
            private int _index = -1;
            private int _start = -1;

            public Reader(string input)
            {
                _input = input;
            }

            private bool Ready { get { return _index >= 0 && _index < _input.Length; } }
            public char? Value { get { return Ready ? _input[_index] : (char?)null; } }
            public int Position { get { return _index + 1; } }

            public void Mark()
            {
                _start = _index;
            }

            public void MarkFromNext()
            {
                _start = _index + 1;
            }

            public string Marked()
            {
                return Marked(0);
            }

            public string MarkedExceptLast()
            {
                return Marked(-1);
            }

            private string Marked(int trim)
            {
                var start = _start;
                var count = Math.Min(_input.Length, _index + trim) - start;
                return count > 0
                     ? _input.Substring(start, count)
                     : string.Empty;
            }

            public char? Read()
            {
                _index = Position >= _input.Length ? _input.Length : _index + 1;
                return Value;
            }

            public void Unread()
            {
                _index = Math.Max(-1, _index - 1);
            }

            public string MarkedWithUnread()
            {
                var text = Marked();
                Unread();
                return text;
            }
        }
    }
}


#pragma warning restore