2009-09-22 39 views
1

有这种名为ocamorph的形态分析器(开源,写在OCml中)。 download and make instructions hereJNI分段错误bug

java的绑定有问题,我得修复它,经过几个小时的努力,现在看来我需要几天的时间来修复它,因为我不熟悉C,JNI ,OCml和这个特殊的软件。

在这里你可以看到一个小文件(subtitles_136.hu.tok),它的工作原理,但对于一个较大的文件(Tolkien_1.hu.tok)“分段错误”被抛出:

[email protected]:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ java -Djava.library.path=./output/ -cp output mokk.nlp.ocamorph.FileStemmer $HULEXICON src/java/mokk/nlp/ocamorph/cache2.txt > src/java/mokk/nlp/ocamorph/subtitles_136.hu.stem < src/java/mokk/nlp/ocamorph/subtitles_136.hu.tok 
[email protected]:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ java -Djava.library.path=./output/ -cp output mokk.nlp.ocamorph.FileStemmer $HULEXICON src/java/mokk/nlp/ocamorph/cache.txt > src/java/mokk/nlp/ocamorph/Tolkien_1.en.stem < src/java/mokk/nlp/ocamorph/Tolkien_1.en.tok 
Segmentation fault 
[email protected]:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ ls -l src/java/mokk/nlp/ocamorph/ 
total 2116 
-rw-rw-r-- 1 bpgergo breka 8505 2009-09-22 13:53 cache2.txt 
-rw-rw-r-- 1 bpgergo breka  65 2009-07-07 18:48 Compounds.java 
drwxrwxr-x 2 bpgergo breka 4096 2009-09-22 13:54 CVS 
-rw-rw-r-- 1 bpgergo breka 5888 2009-09-18 17:19 FileStemmer.java 
-rw-rw-r-- 1 bpgergo breka  77 2009-07-07 18:48 Guess.java 
-rw-rw-r-- 1 bpgergo breka  953 2009-08-31 18:58 IOcamorphStemmer.java 
-rw-rw-r-- 1 bpgergo breka 5419 2009-08-31 18:58 OcamorphCachedStemmer.java 
-rw-rw-r-- 1 bpgergo breka 2836 2009-08-03 16:00 OcamorphStemmer.java 
-rw-rw-r-- 1 bpgergo breka 4612 2009-09-22 12:51 OcamorphWrapper.java 
-rw-rw-r-- 1 bpgergo breka 6731 2009-09-22 13:53 subtitles_136.hu.stem 
-rw-rw-r-- 1 bpgergo breka 7356 2009-09-20 21:12 subtitles_136.hu.tok 
-rw-rw-r-- 1 bpgergo breka 2907 2009-09-18 17:22 Tester.java 
-rw-rw-r-- 1 bpgergo breka  0 2009-09-22 13:53 Tolkien_1.en.stem 
-rw-rw-r-- 1 bpgergo breka 1033059 2009-09-17 16:09 Tolkien_1.en.tok 
-rw-rw-r-- 1 bpgergo breka  0 2009-09-22 13:14 Tolkien_1.hu.stem 
-rw-rw-r-- 1 bpgergo breka 1041968 2009-09-17 16:09 Tolkien_1.hu.tok 
[email protected]:~/hunglish_tools/ocamorph/ocamorph/src/bindings/java $ 

这是Java绑定的C部分(/ocamorph/src/bindings/java/src/c/hunmorph_jnistub.c)。这可能是车的一部分,感谢您的任何提示或帮助寻找错误:

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include "mokk_nlp_ocamorph_OcamorphWrapper.h" 

#include "ocamorph.h" 
#define MAX_ANALYSIS 100 
#define ANALYSIS_MAXLEN 100 

// initialize the analysis string 
    char analysis[ANALYSIS_MAXLEN]; 
    // initialize input buffer 
    char buffer[500]; 
    char* analyses[MAX_ANALYSIS]; 

jmethodID MID_InstanceMethodCall_callback; 


JNIEXPORT void JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_initIDs 
    (JNIEnv *env, jclass cls) { 

    MID_InstanceMethodCall_callback = 
     (*env)->GetMethodID(env, cls, "callback", "([B)V"); 

} 
JNIEXPORT jlong JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_init 
    (JNIEnv * env, jobject obj, jstring bin_arg) { 

    /* Convert to UTF8 */ 
    const char *bin_file = (*env)->GetStringUTFChars(env, bin_arg, JNI_FALSE); 

    ocamorph_startup(); 
    ocamorph_engine engine = init_from_bin(bin_file,0/*Don't pass the stupid no_caps argument*/); 

    /* Release created UTF8 string */ 
    (*env)->ReleaseStringUTFChars(env, bin_arg, bin_file); 

    int i; 
    for (i=0; i<MAX_ANALYSIS;i++) { 
    analyses[i] = (char *) malloc(ANALYSIS_MAXLEN * sizeof(char)); 
    }; 

    return (jlong) engine; 

} 

JNIEXPORT jlong JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_make_1analyzer 
    (JNIEnv *env, jobject obj, jlong engine , jint blocking, jint compunds, jint stop_at_first, jint guess) { 

    ocamorph_engine analyzer = make_analyzer((ocamorph_engine) engine, blocking, compunds, stop_at_first, guess); 

    return (jlong) analyzer; 

} 

JNIEXPORT void JNICALL Java_mokk_nlp_ocamorph_OcamorphWrapper_analyze 
    (JNIEnv * env, jobject obj, jlong analyzer, jbyteArray word) { 

    ocamorph_engine analyzerc = (ocamorph_engine) analyzer; 

    /* Convert to UTF8 */ 
    // const char *wordc = (*env)->GetStringUTFChars(env, word, JNI_FALSE); 

    //char *wordc = (char *) (*env)->GetByteArrayElements(env, word, 0); 

    const int maxInputLength = 1000; 
    char wordc[maxInputLength]; 
    jsize len = (*env)->GetArrayLength(env,word); 
    if (len>=maxInputLength) { len = maxInputLength-1; } 

    if (len!=0) 
    { 
    (*env)->GetByteArrayRegion(env,word,0,len,(jbyte*)wordc); 
    } 
    wordc[len] = '\0'; 

    int n = analyze(analyzerc,wordc,analyses,MAX_ANALYSIS, ANALYSIS_MAXLEN); 

    int i; 
    for (i=0; i < n; ++i) { 
     // jstring ana = (*env)->NewStringUTF(env, analyses[i]); 
     char* ana = analyses[i]; 
    jbyteArray jb=(*env)->NewByteArray(env, strlen(ana)); 
    (*env)->SetByteArrayRegion(env, jb, 0, strlen(ana), (jbyte *)ana); 
    (*env)->CallVoidMethod(env, obj, MID_InstanceMethodCall_callback, jb); 

    } 
// (*env)->ReleaseStringUTFChars(env, word, wordc); 
} 

这里是Java的一部分(/ ocamorph/src目录/绑定/ JAVA/src目录/ JAVA/mokk/NLP/ocamorph /OcamorphWrapper.java):

package mokk.nlp.ocamorph; 

import java.io.UnsupportedEncodingException; 
import java.util.LinkedList; 
import java.util.List; 

/** 
* JNI interface for Ocamorph. Constructor loads ocamorph engine and a specified binary resource. 
* 
* @author bpgergo 
* 
*/ 
public class OcamorphWrapper { 

private long analyzerId; 
private long engineId; 


private native static void initIDs(); 

private native long init(String bin); 

// const ocamorph_engine engine, const int blocking, const int compounds, 
// const int stop_at_first, const int guess 
// valami hiba van az ocamorph-ban, mert a stop_at_first vezerli az 
// osszetettszosagot 
private native long make_analyzer(long engine, int blocking, int compounds, 
    int stop_at_first, int guess); 

private native void analyze(long analyzer, byte[] word); 

static { 
    //TODO FIXME how to define the library dynamically? 
    System.loadLibrary("ocamorph"); 
    initIDs(); 
} 

/** 
    * the encoding required by the ocamorph lib 
    */ 
private static String encoding = "ISO-8859-2"; 

//private static boolean debug = false; 

/** 
    * analyze result (the callback will add the result strings) 
    */ 
private List<String> analyzeResult = null; 

/** 
    * Loads a new Ocamorph engine, using the given binary resource and the arguments. 
    * 
    * @param bin 
    * @param blocking 
    * @param stopAtFirst 
    * @param compounds 
    * @param guess 
    */ 
public OcamorphWrapper(String bin, boolean blocking, boolean stopAtFirst, 
    Compounds compounds, Guess guess) { 
    super(); 
    engineId = init(bin); 
    int comp = compounds2Code(compounds); 
    int gu = guessToCode(guess); 
    analyzerId = make_analyzer(engineId, boolean2Code(blocking), boolean2Code(stopAtFirst), 
    comp, gu); 
    //debug("engineId:"+engineId); 
    //debug("analyzerId:"+analyzerId); 
    //debug = false; 
} 


/** 
    * This is the interface method for ocamorph analysis for the java side. 
    * @param ba 
    */ 
public List<String> analyze(String word) { 
    //debug("analyze:"); 
    analyzeResult = new LinkedList<String>(); 
    byte[] ba = null; 
    try { 
    ba = word.getBytes(encoding); 
    } catch (UnsupportedEncodingException e1) { 
    System.err 
    .println("Ocamorph analyze UnsupportedEncodingException: "); 
    e1.printStackTrace(); 
    } 
    if (ba != null){ 
    //debug //printBytes(ba, "analizze:"); 
    analyze(analyzerId, ba); 
    } 
    return analyzeResult; 
} 

/** 
    * The C interface will call this method to return analysis results 
    */ 
private void callback(byte[] ana) { 

    String s = null; 
    try { 
    // bpgergo 20090618 this was a bug 
    // s = new String(ana); 
    s = new String(ana, encoding); 
    } catch (UnsupportedEncodingException e) { 
    System.err.println("callback new String(ana, encoding) UnsupportedEncodingException:"); 
    e.printStackTrace(); 
    } 

    analyzeResult.add(s); 

    //if (s != null) { 
    //debug("!callback recieved: "); 
    // debug //printBytes(ana, s); 
    //} else { 
    //debug("callback s == null"); 
    //} 
} 

/* static argument conversion methods */ 

private static int boolean2Code(boolean bool){ 
    if (bool){ 
    return 1; 
    } else { 
    return 0; 
    } 

} 
private static int compounds2Code(Compounds compounds){ 
    int comp = 0; 
    switch (compounds) { 
    case No: 
    comp = 0; 
    break; 
    case Allow: 
    comp = 1; 
    break; 
    } 
    return comp; 
} 

private static int guessToCode(Guess guess){ 
    int gu = 0; 
    switch (guess) { 
    case NoGuess: 
    gu = 0; 
    break; 
    case Fallback: 
    gu = 1; 
    break; 
    case Global: 
    gu = 2; 
    break; 
    } 
    return gu; 
} 

public String getEncoding() { 
    return encoding; 
} 
public long getAnalyzerId() { 
    return analyzerId; 
} 


/*private static void debug(String string) { 
    if (debug) { 
    System.out.println(string); 
    } 
}*/ 

/* getter/setter methods */ 

public boolean isDebug() { 
    return false; //debug; 
} 

public void setDebug(boolean debug) { 
    //OcamorphWrapper.debug = debug; 
} 

/* static debug methods */ 

/*public static void printBytes(byte[] array, String name) { 
    if (debug) { 
    for (int k = 0; k < array.length; k++) { 
    debug(name + "[" + k + "] = " + "0x" + byteToHex(array[k])); 
    } 
    } 
}*/ 

/*static public String byteToHex(byte b) { 
    // Returns hex String representation of byte b 
    char hexDigit[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 
    'a', 'b', 'c', 'd', 'e', 'f' }; 
    char[] array = { hexDigit[(b >> 4) & 0x0f], hexDigit[b & 0x0f] }; 
    return new String(array); 
}*/ 

/*static public String charToHex(char c) { 
    // Returns hex String representation of char c 
    byte hi = (byte) (c >>> 8); 
    byte lo = (byte) (c & 0xff); 
    return byteToHex(hi) + byteToHex(lo); 
}*/ 



} 

回答

0

尝试建立与调试信息的C代码,并查找如何启用核心转储您的(貌似类Unix)操作系统。这应该给你一个出发点。

1

当系统崩溃时,是否有任何类型的hs_pid ###。log文件被创建?他们可以通过偶尔帮助解决这些问题。

我的猜测是,它与设置MID_InstanceMethodCall_callback方法ID的古怪方式有关。该id存储为一个全局值,并且只有在调用initIDs静态方法时才会设置该值,但在您的示例代码中不会出现该方法。如果没有设置,那么分析会在尝试调用回调方法时发生变化。一种确保获得回叫方法ID的方法如下:

jclass cls = (*env)->GetObjectClass(env, obj); 
if(cls == NULL){ 
    //Handle any errors 
} 
jmethodID mid = (*env)->GetMethodID(env, cls, "callback", "([B)V"); 
if(mid == NULL){ 
    //Handle any more errors 
} 
int i; 
for (i=0; i < n; ++i) { 
    // jstring ana = (*env)->NewStringUTF(env, analyses[i]); 
    char* ana = analyses[i]; 
    jbyteArray jb=(*env)->NewByteArray(env, strlen(ana)); 
    (*env)->SetByteArrayRegion(env, jb, 0, strlen(ana), (jbyte *)ana); 
    (*env)->CallVoidMethod(env, obj, mid, jb); 

}