2016-04-15 35 views
0
Here is my SentiWorNet Algo: 



public class SWN3 { 

private String pathToSWN = "C:/Users/RAHUL/Desktop/SWN/SentiWordNet_3.0.0.txt"; 
     private HashMap<String, Double>_dict; 

     public SWN3(){ 

      _dict = new HashMap<String, Double>(); 
      HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>(); 
      try{ 
       BufferedReader csv = new BufferedReader(new FileReader(pathToSWN)); 
       String line = "";   
       while((line = csv.readLine()) != null) 
       { 
        String[] data = line.split("\t"); 
        Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]); 
        String[] words = data[4].split(" "); 
        for(String w:words) 
        { 
         String[] w_n = w.split("#"); 
         w_n[0] += "#"+data[0]; 
         int index = Integer.parseInt(w_n[1])-1; 
         if(_temp.containsKey(w_n[0])) 
         { 
          Vector<Double> v = _temp.get(w_n[0]); 
          if(index>v.size()) 
           for(int i = v.size();i<index; i++) 
            v.add(0.0); 
          v.add(index, score); 
          _temp.put(w_n[0], v); 
         } 
         else 
         { 
          Vector<Double> v = new Vector<Double>(); 
          for(int i = 0;i<index; i++) 
           v.add(0.0); 
          v.add(index, score); 
          _temp.put(w_n[0], v); 
         } 
        } 
       } 
       Set<String> temp = _temp.keySet(); 
       for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) { 
        String word = iterator.next(); 
        Vector<Double> v = _temp.get(word); 
        double score = 0.0; 
        double sum = 0.0; 
        for(int i = 0; i < v.size(); i++) 
         score += ((double)1/(double)(i+1))*v.get(i); 
        for(int i = 1; i<=v.size(); i++) 
         sum += (double)1/(double)i; 
        score /= sum; 
        String sent = "";    
        if(score>=0.75) 
         sent = "strong_positive"; 
        else 
        if(score > 0.50 && score<0.75) 
         sent = "moderately_positive"; 
        else 
         if(score > 0.25 && score>=0.50) 
          sent = "positive"; 
        else 
        if(score > 0 && score>=0.25) 
         sent = "weak_positive"; 
        else 
        if(score < 0 && score>=-0.25) 
         sent = "weak_negative"; 
        else 
        if(score < -0.25 && score>=-0.5) 
         sent = "negative"; 
        else 
         if(score < -0.50 && score>-0.75) 
          sent = "moderately_negative"; 
        else 
        if(score<=-0.75) 
         sent = "strong_negative"; 
        _dict.put(word, score); 
       } 
      } 
      catch(Exception e){e.printStackTrace();}   
     } 

public Double extract(String word) 
{ 
    Double total = new Double(0); 
    if(_dict.get(word+"#n") != null) 
     total = _dict.get(word+"#n") + total; 
    if(_dict.get(word+"#a") != null) 
     total = _dict.get(word+"#a") + total; 
    if(_dict.get(word+"#r") != null) 
     total = _dict.get(word+"#r") + total; 
    if(_dict.get(word+"#v") != null) 
     total = _dict.get(word+"#v") + total; 
    return total; 
} 



public static String SentiWord(String stri) { 
    SWN3 test = new SWN3(); 
    String sentence=stri; 
    String[] words = sentence.split("\\s+"); 
    double totalScore = 0; 
    for(String word : words) { 
     word = word.replaceAll("([^a-zA-Z\\s])", ""); 
     if (test.extract(word) == null) 
      continue; 
     totalScore += test.extract(word); 
    } 

    String sent = "";    
    if(totalScore>=0.75) 
     sent = "strong_positive"; 
    else 
    if(totalScore > 0.25 && totalScore<0.75) 
     sent = "positive"; 
    .... 
    .... 

    return sent; 
} 

} 

这里是我的名次标注器方法:如何POS恶搞与SentiWordNet算法集成

public class TagText { 
public static void main(String[] args) throws IOException, 
ClassNotFoundException { 

// Initialize the tagger 
MaxentTagger tagger = new MaxentTagger("taggers/english-left3words-distsim.tagger"); 

// The sample string 
String sample = "This is a sample text"; 

// The tagged string 
String tagged = tagger.tagString(sample); 

//output the tagged sample string onto your console 
System.out.println("Input: " + sample); 
System.out.println("Output: "+ tagged); 
} 
} 

我需要POS标注器与SentiwordNet.I想使对青涩分析的系统集成。现在这SentiwordNet代码工作正常,没有pos标记,但没有给出好的结果。我无法弄清楚。请帮忙。

回答

0

你可以在SWN3这样的适应您的extract方法:

public Double extract(String word, String tail) { 
    if (tail.contains("NN") || tail.contains("NNS") 
      || tail.contains("NNP") 
      || tail.contains("NNPS")) 
     return _dict.get(word + "#n"); 
    else if (tail.contains("VB") || tail.contains("VBD") 
      || tail.contains("VBG") || tail.contains("VBN") 
      || tail.contains("VBP") || tail.contains("VBZ")) 
     return _dict.get(word + "#v"); 
    else if (tail.contains("JJ") || tail.contains("JJR") 
      || tail.contains("JJS")) 
     return _dict.get(word + "#a"); 
    else if (tail.contains("RB") || tail.contains("RBR") 
      || tail.contains("RBS")) 
     return _dict.get(word + "#r"); 
    else 
     return null; 
} 

它映射tags与类型的词,如SentiWordNet定义。我建议改变你的主要方法是这样的:

public static void main(String[] args) { 
    MaxentTagger tagger = new MaxentTagger("files/english-left3words-distsim.tagger"); 

    //String sample = "This is a sample text"; 
    String sample = "It works much better with this great example!"; 
    sample = sample.replaceAll("([^a-zA-Z\\s])", ""); 
    String[] words = sample.split("\\s+"); 

    String taggedSample = tagger.tagString(sample); 
    String[] taggedWords = taggedSample.split("\\s+"); 
    System.out.println(tagger.tagString(sample)); 

    double totalScore = 0; 
    SWN3 test = new SWN3(); 
    System.out.println("-----------"); 
    for (int i=0; i<taggedWords.length;i++) { 
     String tail = taggedWords[i].substring(words[i].length() + 1); 
     Double score = null; 
     if(tail!=null{ 
      score = test.extract(words[i], tail); 
      System.out.println(taggedWords[i] + "\t" + words[i] + "\t" + tail + "\t" + score); 
     } 
     if (score == null) 
      continue; 
     totalScore += score; 
    } 
    System.out.println("-----------"); 
    System.out.println(totalScore); 
} 

我用另一句话在sample它工作得更好。请注意,标记句子并单独标记单词可能会导致不同的结果。

我希望它有帮助。

+0

它回答你的问题吗? – joel314

+1

是的。该方案的工作,肯定比以前有更好的结果。感谢您抽出时间:)很多方面 –

+0

Extract方法抛出null指针异常在第1行,即有时在---> if(tail.contains(“NN”)|| tail.contains(“NNS”)。请帮助: ( –