| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- using System;
- using System.Collections.Generic;
- using System.Text;
- public static class SqlTokenizer
- {
- public static List<string> SplitStatements(string sql, bool keepTerminator = false)
- {
- if (sql == null) throw new ArgumentNullException(nameof(sql));
- var results = new List<string>();
- var sb = new StringBuilder(sql.Length);
- int nested = 0;
- int i = 0;
- while (i < sql.Length)
- {
- char c = sql[i];
- // 1) Statement terminator (only when not in any special mode)
- if (c == ';')
- {
- if(nested == 0)
- {
- if (keepTerminator) sb.Append(c);
- var stmt = sb.ToString().Trim();
- if (stmt.Length > 0) results.Add(stmt);
- sb.Clear();
- i++;
- }
- else
- {
- // Don't do anything; we're still in a BEGIN block.
- sb.Append(c);
- i++;
- }
- continue;
- }
- // 2) Single-quoted string: '...'
- if (c == '\'')
- {
- ConsumeSingleQuoted(sql, ref i, sb);
- continue;
- }
- // 3) Double-quoted identifier/string: "..."
- if (c == '"')
- {
- ConsumeDoubleQuoted(sql, ref i, sb);
- continue;
- }
- // 4) Line comment: -- ... (to end of line)
- if (c == '-' && i + 1 < sql.Length && sql[i + 1] == '-')
- {
- ConsumeLineComment(sql, ref i, sb);
- continue;
- }
- // 5) Block comment: /* ... */
- if (c == '/' && i + 1 < sql.Length && sql[i + 1] == '*')
- {
- ConsumeBlockComment(sql, ref i, sb);
- continue;
- }
- // // 6) PostgreSQL dollar-quoted string: $tag$ ... $tag$ or $$ ... $$
- // if (c == '$')
- // {
- // if (TryConsumeDollarQuoted(sql, ref i, sb))
- // continue;
- // }
- // CASE statement
- if(c == 'C' && i + 3 < sql.Length
- && sql[i + 1] == 'A'
- && sql[i + 2] == 'S'
- && sql[i + 3] == 'E')
- {
- ++nested;
- sb.Append("CASE");
- i += 4;
- continue;
- }
- // BEGIN statement
- if(c == 'B' && i + 4 < sql.Length
- && sql[i + 1] == 'E'
- && sql[i + 2] == 'G'
- && sql[i + 3] == 'I'
- && sql[i + 4] == 'N')
- {
- ++nested;
- sb.Append("BEGIN");
- i += 5;
- continue;
- }
- // END statement
- if(c == 'E' && i + 2 < sql.Length
- && sql[i + 1] == 'N'
- && sql[i + 2] == 'D')
- {
- --nested;
- sb.Append("END");
- i += 3;
- continue;
- }
- // Default: copy char
- sb.Append(c);
- i++;
- }
- // Remainder
- var last = sb.ToString().Trim();
- if (last.Length > 0) results.Add(last);
- return results;
- }
- private static void ConsumeSingleQuoted(string s, ref int i, StringBuilder sb)
- {
- // We are at opening '
- sb.Append(s[i]);
- i++;
- while (i < s.Length)
- {
- char c = s[i];
- sb.Append(c);
- i++;
- if (c == '\'')
- {
- // SQL escape for ' inside string is doubled ''
- if (i < s.Length && s[i] == '\'')
- {
- sb.Append(s[i]);
- i++;
- continue;
- }
- break; // end of string
- }
- }
- }
- private static void ConsumeDoubleQuoted(string s, ref int i, StringBuilder sb)
- {
- // We are at opening "
- sb.Append(s[i]);
- i++;
- while (i < s.Length)
- {
- char c = s[i];
- sb.Append(c);
- i++;
- if (c == '"')
- {
- // Escaped " inside identifier/string is doubled ""
- if (i < s.Length && s[i] == '"')
- {
- sb.Append(s[i]);
- i++;
- continue;
- }
- break; // end
- }
- }
- }
- private static void ConsumeLineComment(string s, ref int i, StringBuilder sb)
- {
- // We are at first '-'
- sb.Append(s[i]);
- sb.Append(s[i + 1]);
- i += 2;
- while (i < s.Length)
- {
- char c = s[i];
- sb.Append(c);
- i++;
- if (c == '\n') break; // end of line comment
- }
- }
- private static void ConsumeBlockComment(string s, ref int i, StringBuilder sb)
- {
- // We are at '/'
- sb.Append(s[i]);
- sb.Append(s[i + 1]);
- i += 2;
- while (i < s.Length)
- {
- char c = s[i];
- sb.Append(c);
- i++;
- if (c == '*' && i < s.Length && s[i] == '/')
- {
- sb.Append(s[i]);
- i++;
- break;
- }
- }
- }
- private static bool TryConsumeDollarQuoted(string s, ref int i, StringBuilder sb)
- {
- // Dollar quote opener: $tag$ where tag is [A-Za-z_][A-Za-z0-9_]* or empty (i.e. $$)
- // If not a valid opener, return false and let caller handle '$' normally.
- int start = i;
- int j = i + 1;
- // Find next '$' to close the opener
- while (j < s.Length && s[j] != '$')
- {
- // tag chars must be letters/digits/_ only
- char ch = s[j];
- if (!(char.IsLetterOrDigit(ch) || ch == '_'))
- return false;
- j++;
- }
- if (j >= s.Length || s[j] != '$')
- return false; // no closing '$' for opener
- // opener is s[start..j] inclusive
- string tag = s.Substring(start, j - start + 1); // includes both '$'
- // Examples: "$$" or "$abc$"
- // Consume opener
- sb.Append(tag);
- i = j + 1;
- // Now consume until we see the same tag again
- while (i < s.Length)
- {
- // fast check for tag match
- if (s[i] == '$' && i + tag.Length <= s.Length &&
- string.CompareOrdinal(s, i, tag, 0, tag.Length) == 0)
- {
- sb.Append(tag);
- i += tag.Length;
- return true;
- }
- sb.Append(s[i]);
- i++;
- }
- // Unterminated dollar quote: we consumed to end; still treat as consumed
- return true;
- }
- }
|