You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

507 lines
14 KiB

// Copyright (c) 2009, 2020 Oracle and/or its affiliates.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License, version 2.0, as
// published by the Free Software Foundation.
//
// This program is also distributed with certain software (including
// but not limited to OpenSSL) that is licensed under separate terms,
// as designated in a particular file or component or in included license
// documentation. The authors of MySQL hereby grant you an
// additional permission to link the program and your derivative works
// with the separately licensed software that they have included with
// MySQL.
//
// Without limiting anything contained in the foregoing, this file,
// which is part of MySQL Connector/NET, is also subject to the
// Universal FOSS Exception, version 1.0, a copy of which can be found at
// http://oss.oracle.com/licenses/universal-foss-exception.
//
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU General Public License, version 2.0, for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software Foundation, Inc.,
// 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
using MySql.Data.MySqlClient;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace MySql.Data.Common
{
internal class QueryNormalizer
{
private static readonly List<string> Keywords = new List<string>();
private readonly List<Token> _tokens = new List<Token>();
private int _pos;
private string _fullSql;
private string _queryType;
static QueryNormalizer()
{
Keywords = SchemaProvider.GetReservedWords().AsDataTable().
Select().
Select(x => x[0].ToString()).ToList();
}
public string QueryType => this._queryType;
public string Normalize(string sql)
{
this._tokens.Clear();
StringBuilder newSql = new StringBuilder();
this._fullSql = sql;
this.TokenizeSql(sql);
this.DetermineStatementType(this._tokens);
ProcessMathSymbols(this._tokens);
this.CollapseValueLists(this._tokens);
this.CollapseInLists(this._tokens);
CollapseWhitespace(this._tokens);
foreach (Token t in this._tokens.Where(t => t.Output))
{
newSql.Append(t.Text);
}
return newSql.ToString();
}
private void DetermineStatementType(List<Token> tok)
{
foreach (Token t in tok.Where(t => t.Type == TokenType.Keyword))
{
this._queryType = t.Text.ToUpperInvariant();
break;
}
}
/// <summary>
/// Mark - or + signs that are unary ops as no output
/// </summary>
/// <param name="tok"></param>
private static void ProcessMathSymbols(List<Token> tok)
{
Token lastToken = null;
foreach (Token t in tok)
{
if (t.Type == TokenType.Symbol &&
(t.Text == "-" || t.Text == "+"))
{
if (lastToken != null &&
lastToken.Type != TokenType.Number &&
lastToken.Type != TokenType.Identifier &&
(lastToken.Type != TokenType.Symbol || lastToken.Text != ")"))
{
t.Output = false;
}
}
if (t.IsRealToken)
{
lastToken = t;
}
}
}
private static void CollapseWhitespace(List<Token> tok)
{
Token lastToken = null;
foreach (Token t in tok)
{
if (t.Output &&
t.Type == TokenType.Whitespace &&
lastToken != null &&
lastToken.Type == TokenType.Whitespace)
{
t.Output = false;
}
if (t.Output)
{
lastToken = t;
}
}
}
private void CollapseValueLists(List<Token> tok)
{
int pos = -1;
while (++pos < tok.Count)
{
Token t = tok[pos];
if (t.Type != TokenType.Keyword)
{
continue;
}
if (!t.Text.StartsWith("VALUE", StringComparison.OrdinalIgnoreCase))
{
continue;
}
this.CollapseValueList(tok, ref pos);
}
}
private void CollapseValueList(List<Token> tok, ref int pos)
{
List<int> parenIndices = new List<int>();
// this while loop will find all closing parens in this value list
while (true)
{
// find the close ')'
while (++pos < tok.Count)
{
if (tok[pos].Type == TokenType.Symbol && tok[pos].Text == ")")
{
break;
}
if (pos == tok.Count - 1)
{
break;
}
}
parenIndices.Add(pos);
// now find the next "real" token
while (++pos < tok.Count)
{
if (tok[pos].IsRealToken)
{
break;
}
}
if (pos == tok.Count)
{
break;
}
if (tok[pos].Text == ",")
{
continue;
}
pos--;
break;
}
// if we only have 1 value then we don't collapse
if (parenIndices.Count < 2)
{
return;
}
int index = parenIndices[0];
tok[++index] = new Token(TokenType.Whitespace, " ");
tok[++index] = new Token(TokenType.Comment, "/* , ... */");
index++;
// now mark all the other tokens as no output
while (index <= parenIndices[parenIndices.Count - 1])
{
tok[index++].Output = false;
}
}
private void CollapseInLists(List<Token> tok)
{
int pos = -1;
while (++pos < tok.Count)
{
Token t = tok[pos];
if (t.Type != TokenType.Keyword)
{
continue;
}
if (t.Text != "IN")
{
continue;
}
CollapseInList(tok, ref pos);
}
}
private static Token GetNextRealToken(List<Token> tok, ref int pos)
{
while (++pos < tok.Count)
{
if (tok[pos].IsRealToken)
{
return tok[pos];
}
}
return null;
}
private static void CollapseInList(List<Token> tok, ref int pos)
{
Token t = GetNextRealToken(tok, ref pos);
// Debug.Assert(t.Text == "(");
if (t == null)
{
return;
}
// if the first token is a keyword then we likely have a
// SELECT .. IN (SELECT ...)
t = GetNextRealToken(tok, ref pos);
if (t == null || t.Type == TokenType.Keyword)
{
return;
}
int start = pos;
// first find all the tokens that make up the in list
while (++pos < tok.Count)
{
t = tok[pos];
if (t.Type == TokenType.CommandComment)
{
return;
}
if (!t.IsRealToken)
{
continue;
}
if (t.Text == "(")
{
return;
}
if (t.Text == ")")
{
break;
}
}
int stop = pos;
for (int i = stop; i > start; i--)
{
tok.RemoveAt(i);
}
tok.Insert(++start, new Token(TokenType.Whitespace, " "));
tok.Insert(++start, new Token(TokenType.Comment, "/* , ... */"));
tok.Insert(++start, new Token(TokenType.Whitespace, " "));
tok.Insert(++start, new Token(TokenType.Symbol, ")"));
}
private void TokenizeSql(string sql)
{
this._pos = 0;
while (this._pos < sql.Length)
{
char c = sql[this._pos];
if (this.LetterStartsComment(c) && this.ConsumeComment())
{
continue;
}
if (Char.IsWhiteSpace(c))
{
this.ConsumeWhitespace();
}
else if (c == '\'' || c == '\"' || c == '`')
{
this.ConsumeQuotedToken(c);
}
else if (!this.IsSpecialCharacter(c))
{
this.ConsumeUnquotedToken();
}
else
{
this.ConsumeSymbol();
}
}
}
private bool LetterStartsComment(char c)
{
return c == '#' || c == '/' || c == '-';
}
private bool IsSpecialCharacter(char c)
{
return !Char.IsLetterOrDigit(c) && c != '$' && c != '_' && c != '.';
}
private bool ConsumeComment()
{
char c = this._fullSql[this._pos];
// make sure the comment starts correctly
if (c == '/' && ((this._pos + 1) >= this._fullSql.Length || this._fullSql[this._pos + 1] != '*'))
{
return false;
}
if (c == '-' && ((this._pos + 2) >= this._fullSql.Length || this._fullSql[this._pos + 1] != '-' || this._fullSql[this._pos + 2] != ' '))
{
return false;
}
string endingPattern = "\n";
if (c == '/')
{
endingPattern = "*/";
}
int startingIndex = this._pos;
int index = this._fullSql.IndexOf(endingPattern, this._pos);
if (index == -1)
{
index = this._fullSql.Length - 1;
}
else
{
index += endingPattern.Length;
}
string comment = this._fullSql.Substring(this._pos, index - this._pos);
if (comment.StartsWith("/*!", StringComparison.Ordinal))
{
this._tokens.Add(new Token(TokenType.CommandComment, comment));
}
this._pos = index;
return true;
}
private void ConsumeSymbol()
{
char c = this._fullSql[this._pos++];
this._tokens.Add(new Token(TokenType.Symbol, c.ToString()));
}
private void ConsumeQuotedToken(char c)
{
bool escaped = false;
int start = this._pos;
this._pos++;
while (this._pos < this._fullSql.Length)
{
char x = this._fullSql[this._pos];
if (x == c && !escaped)
{
break;
}
if (escaped)
{
escaped = false;
}
else if (x == '\\')
{
escaped = true;
}
this._pos++;
}
this._pos++;
this._tokens.Add(c == '\''
? new Token(TokenType.String, "?")
: new Token(TokenType.Identifier, this._fullSql.Substring(start, this._pos - start)));
}
private void ConsumeUnquotedToken()
{
int startPos = this._pos;
while (this._pos < this._fullSql.Length && !this.IsSpecialCharacter(this._fullSql[this._pos]))
{
this._pos++;
}
string word = this._fullSql.Substring(startPos, this._pos - startPos);
double v;
if (double.TryParse(
word,
System.Globalization.NumberStyles.Any,
System.Globalization.CultureInfo.InvariantCulture,
out v))
{
this._tokens.Add(new Token(TokenType.Number, "?"));
}
else
{
Token t = new Token(TokenType.Identifier, word);
if (IsKeyword(word))
{
t.Type = TokenType.Keyword;
t.Text = t.Text.ToUpperInvariant();
}
this._tokens.Add(t);
}
}
private void ConsumeWhitespace()
{
this._tokens.Add(new Token(TokenType.Whitespace, " "));
while (this._pos < this._fullSql.Length && Char.IsWhiteSpace(this._fullSql[this._pos]))
{
this._pos++;
}
}
private static bool IsKeyword(string word)
{
return Keywords.Contains(word.ToUpperInvariant());
}
}
internal class Token
{
public TokenType Type;
public string Text;
public bool Output;
public Token(TokenType type, string text)
{
this.Type = type;
this.Text = text;
this.Output = true;
}
public bool IsRealToken => this.Type != TokenType.Comment &&
this.Type != TokenType.CommandComment &&
this.Type != TokenType.Whitespace &&
this.Output;
}
internal enum TokenType
{
Keyword,
String,
Number,
Symbol,
Identifier,
Comment,
CommandComment,
Whitespace
}
}