2009-10-19 15 views
0

我的解析器通过首先从中缀转换为后缀来评估PEMDAS表达式,然后使用标准后缀评估规则。我解析表达式并将令牌存储在列表中。这个预编译对我来说可以,因为我打算缓存预编译的函数。帮助优化我的RPN评估函数

我想优化评估函数(请参阅代码中的Evaluate04)。在我的硬件上,我可以在不到600毫秒内获得1,000,000次评估。说实话,我相信这已经足够快了。远快于数据库访问调用来获取表达式。

我想看看你们是否可以让它变得更快。微优化,完全重构类。如何完成并不重要。

这是一样快,我已经能够得到它,你能改善它吗?

using System; 
using System.Collections.Generic; 
using System.Linq; 
using System.Windows.Forms; 

namespace ParseTest2 
{ 
    public partial class Form1 : Form 
    { 
     public Form1() 
     { 
      InitializeComponent(); 
     } 

     enum Operator {Add, Subtract, Multiply, Divide} 

     enum TokenType { Variable, Value, Add, Subtract, Multiply, Divide }; 

     private class TokenFactory 
     { 
      public static Token Add 
      { 
       get { return new Token(TokenType.Add); } 
      } 
      public static Token Subtract 
      { 
       get { return new Token(TokenType.Subtract); } 
      } 
      public static Token Multiply 
      { 
       get { return new Token(TokenType.Multiply); } 
      } 
      public static Token Divide 
      { 
       get { return new Token(TokenType.Divide); } 
      } 
      public static Token Val(float value) 
      { 
       return new Token(value); 
      } 
      public static Token Var(string variable) 
      { 
       return new Token(variable); 
      } 
     } 

     private class Token 
     { 
      public readonly TokenType TokenType; 
      public float Value; 
      public readonly string Variable; 

      public Token(TokenType tokenType) 
      { 
       TokenType = tokenType; 
       //Value = 0; 
       //Variable = null;     
      } 
      public Token(float value) 
      { 
       TokenType = TokenType.Value; 
       Value = value; 
       //Variable = null; 
      } 
      public Token(string variable) 
      { 
       //TokenType = TokenType.Variable; 
       Variable = variable; 
       //Value = 0; 
      } 
      public override string ToString() 
      { 
       return 
        Expression.IsOperand(this) ? 
        string.Format("{0} {1} {2}", TokenType, Variable ?? "", Value) : 
        string.Format("{0}", TokenType); 
      } 
     } 

     class Expression 
     { 
      List<Token> _tokens; 

      public Action<Token> SetVariableValue; 

      public Expression(string expression) 
      { 
       //time to convert to postfix from infix 
       var infix = expression.Split(' '); 
       string postfix = string.Empty; 

       Action<string> add = s => {postfix += " " + s;}; 
       var ops = new Stack<string>(); 

       Func<string, int> getPrecedence = value => 
       { 
        switch(value) 
        { 
         case "*": 
         case "/": 
          return 1; 
         case "+": 
         case "-": 
          return 0; 

        } 
        return -1; 
       }; 

       Func<string, bool> isLowerOrEqualPrecedence = val => 
       { 
        if (ops.Count == 0) return false; 
        var op = ops.Peek(); 
        int cur = getPrecedence(val); 
        int top = getPrecedence(op); 
        return cur <= top; 
       }; 

       foreach (string val in infix) 
       { 
        if ("-+*/".Contains(val)) 
        { 
         if (ops.Count == 0) 
          ops.Push(val); 
         else 
         { 
          if (!isLowerOrEqualPrecedence(val)) 
          { 
           ops.Push(val); 
          } 
          else 
          { 
           while (isLowerOrEqualPrecedence(val)) 
           { 
            add(ops.Pop()); 
           } 
           ops.Push(val); 
          } 
         } 
        } 
        else if (val == "(") 
         ops.Push(val); 
        else if (val == ")") 
        { 
         while (true) 
         { 
          var op = ops.Pop(); 
          if (op == "(") break; 
          add(op); 
         } 
        } 
        else 
        { 
         add(val); 
        }      
       } 

       while (ops.Count > 0) 
       { 
        add(ops.Pop()); 
       } 


       _tokens = new List<Token>(); 
       foreach (string x in postfix.Trim().Split(' ')) 
       { 
        switch(x) 
        { 
         case "*": 
          _tokens.Add(TokenFactory.Multiply); 
          break; 
         case "/": 
          _tokens.Add(TokenFactory.Divide); 
          break; 
         case "+": 
          _tokens.Add(TokenFactory.Add); 
          break;       
         case "-": 
          _tokens.Add(TokenFactory.Subtract); 
          break; 
         default: 
          _tokens.Add(TokenFactory.Var(x)); 
          break; 
        } 
       }        
      } 

      public static bool IsOperand(Token tk) 
      { 
       return tk.TokenType == TokenType.Variable || tk.TokenType == TokenType.Value; 
      } 



      public float Evaluate04() 
      { 
       Token[] tokens = _tokens.ToArray(); 

       int i; 

       for (i = tokens.Length - 1; i >= 0; --i) 
        if (tokens[i].TokenType == TokenType.Variable) 
         SetVariableValue(tokens[i]); 

       int tokenCount = tokens.Length; 

       while (true) 
       { 
        int i1 = 0, i2 = 0, i3 = 0; 
        //i1 = i2 = i3 = -1; 
        int j; 
        Token t1, t2, t3; 

        //looking for operator preceded by two operands 
        for (i = 0; i < tokens.Length - 2; ++i) 
        //i = 0; 
        //while(true) 
        { 
         t1 = null; 
         t2 = null; 
         t3 = null; 

         j = i; 
         do 
         { 
          t1 = tokens[j]; 
          i1 = j++; 
         } while (t1 == null); 


         do 
         { 
          t2 = tokens[j]; 
          i2 = j++; 
         } while (t2 == null); 

         do 
         { 
          t3 = tokens[j]; 
          i3 = j++; 
         } while (t3 == null); 

         //it's a binary operation if t3 is an operator and t2, t1 are operands 
         //if (!IsOperand(t3) && IsOperand(t2) && IsOperand(t1)) 
         if (
          !(t3.TokenType == TokenType.Variable || t3.TokenType == TokenType.Value) && 
          (t2.TokenType == TokenType.Variable || t2.TokenType == TokenType.Value) && 
          (t1.TokenType == TokenType.Variable || t1.TokenType == TokenType.Value)) 
         { 
          float val; 

          if (t3.TokenType == TokenType.Add) 
           val = t1.Value + t2.Value; 
          else if (t3.TokenType == TokenType.Divide) 
           val = t1.Value/t2.Value; 
          else if (t3.TokenType == TokenType.Subtract) 
           val = t1.Value - t2.Value; 
          else if (t3.TokenType == TokenType.Multiply) 
           val = t1.Value * t2.Value; 
          else 
           val = int.MinValue; 

          //we're done, return 
          if (tokenCount == 3) 
           return val; 

          tokenCount -= 2; 

          //ok, we are done processing these, null them 
          tokens[i1] = new Token(val); 
          tokens[i2] = null; 
          tokens[i3] = null; 

          //break, for loop and repeat until done 
          break; 
         } 
        } 
       } 
      } 
     } 
     private void Form1_Load(object sender, EventArgs e) 
     { 

      Action<Token> setVariableValue = token => 
      { 
       if (token.Variable == "A") 
        token.Value = 4; 
       else if (token.Variable == "B") 
        token.Value = 5; 
       else if (token.Variable == "C") 
        token.Value = 7; 
       else if (token.Variable == "D") 
        token.Value = 2; 
      }; 

      //spaces are required because I'm splitting on them. 
      //I know it's lame, it's a detail I'll address later... 
      var x = new Expression("(A + B)/(C + D)"); 
      x.SetVariableValue = setVariableValue; 
      Func<float> eval = x.Evaluate04; 
      eval(); 
      int count = 1000000; 
      float res = 0; 

      var sw = new System.Diagnostics.Stopwatch(); 

      //sw.Start(); 
      //Parallel.ForEach(Enumerable.Range(1, count), i => 
      //{ 
      // res = eval(); 
      //}); 
      //sw.Stop(); 
      //Console.WriteLine("{0} takes: {1}", "parallel", sw.ElapsedMilliseconds); 

      sw.Reset(); 
      sw.Start(); 

      //for(int i=0; i<count; ++i) 
      foreach (int i in Enumerable.Range(1, count)) 
      { 
       res = eval(); 
      } 
      sw.Stop(); 
      Console.WriteLine("{0} takes: {1}", "single thread", sw.ElapsedMilliseconds); 
      Console.WriteLine("{0}", res); 

     } 

     private void Form1_KeyDown(object sender, KeyEventArgs e) 
     { 
      if (e.KeyCode == Keys.Escape) 
       Close(); 

     }   
    } 
} 
+0

如果您的函数被重用的次数比创建次数多,那么LINQ Expressions API将占主导地位。 – 2009-10-19 22:12:06

回答

1

从分析树到RPN的处理不应该是一个性能问题,如果你只做一次并且多次进行评估。

什么是这个令牌爵士和索引令牌数组?

为什么不只是有一个栈,这样的:

for (i = 0; i < n; i++){ 
    switch(op[i]){ 
    case VARIABLE: 
     stk[j++] = <value_of_variable>; 
     break; 
    case PLUS: 
     temp = stk[--j]; 
     stk[j-1] += temp; 
     break; 
    // and so on... 
    } 
} 

在内部循环,你不应该做任何内存分配。 你应该做的最昂贵的事情是查找变量的值。

The easiest way to find out what is taking the most time is this.

第二最简单的方法是通过它只是单步在拆卸水平。如果你发现它会进入像new这样的系统例程,那就快点停下来。迭代器也一样。

+0

感谢您的反馈,我尝试了postfix算法,并且堆栈没有像我发布的数组实现那么好。我知道还有更多的LOC比后缀算法。我认为我认为我应该看看反汇编。我同意,新的objs太昂贵了,迭代器也会减慢速度。 – Steve 2009-10-20 14:20:19

+0

@Steve:祝你好运。我最喜欢的方法是把它放在一个持久的循环中,然后随机手动暂停,也许几次。这即时发现任何不需要的功能调用。然后,当我到热点时,单步进行。 – 2009-10-20 15:22:55