2012-09-30 66 views
3

我正在写一个解析器flex/bison(我可以用Python语言编写解析器,但我总是喜欢经典。)柔性/野牛解析器与分段故障编译

当我与这个编译代码:

gcc -lfl -ly chance.tab.c lex.yy.c -o chance 

当我与一个文件运行程序,我得到的是这样的:

Segmentation fault (core dumped) 

对于任何人的参考,这里有文件:

chance.y

%{ 
    #include <stdio.h> 
%} 

%union { 
    char* str; 
} 

%token ASSERT BREAK CATCH CLASS CONTINUE DEL EACH ELSE ELSEIF FINALLY FROM 
%token FUNC IF LOAD PASS PRINT REPEAT RETURN RUN THROW TRY WHILE UNTIL 
%token YIELD AND OR NOT KTRUE KFALSE NONE 

%token MINUS EXCLAM PERCENT LAND LPAREN RPAREN STAR COMMA DOT SLASH COLON 
%token SEMICOLON QUESTION AT LBRACKET BACKSLASH RBRACKET CIRCUMFLEX LBRACE 
%token BAR RBRACE TILDE PLUS LTHAN EQUAL GTHAN INTDIV 

%token ADDASS SUBASS MULASS DIVASS INTDASS MODASS ANDASS ORASS LTEQ EQUALS 
%token GTEQ INCREMENT DECREMENT DBLSTAR 

%token<str> NAME STRING INTEGER FLOAT 
%token INDENT DEDENT NEWLINE 

%type<str> exprs names args kwdspec dfltarg arg arglist exprlist name namelist 
%type<str> funcargs parenexpr lstexpr eachspec optargs inheritance addop 
%type<str> expr ifs elifs elif elses trys catchs catchx finally suite stmts 
%type<str> stmt program 

%start program 

%% 

exprs: expr       { $$ = $1; } 
| exprs COMMA expr    { sprintf($$, "%s %s", $1, $3); } 
; 

names: name       { $$ = $1; } 
| names COMMA name    { sprintf($$, "%s %s", $1, $3); } 
; 

args: arg       { $$ = $1; } 
| args COMMA arg     { sprintf($$, "%s %s", $1, $3); } 
; 

kwdspec:       { $$ = "regular"; } 
| STAR       { $$ = "list"; } 
| DBLSTAR       { $$ = "keyword"; } 
; 

dfltarg:       { $$ = "null"; } 
| EQUAL expr      { $$ = $2; } 
; 

arg: kwdspec name dfltarg 
     { sprintf($$, "(argument %s %s %s)", $1, $2, $3); } ; 

arglist: args      { sprintf($$, "[%s]", $1); } ; 
exprlist: exprs      { sprintf($$, "[%s]", $1); } ; 
name: NAME       { sprintf($$, "(name %s)", $1); } ; 
namelist: names      { sprintf($$, "[%s]", $1); } ; 
funcargs: LPAREN arglist RPAREN  { $$ = $2 } ; 
parenexpr: LPAREN exprlist RPAREN { sprintf($$, "(tuple %s)", $2); } ; 
lstexpr: LBRACKET exprlist RBRACKET { sprintf($$, "(list %s)", $2); } ; 

eachspec: BAR namelist BAR   { sprintf($$, "(each-spec %s)", $2); } ; 

optargs:       { $$ = ""; } 
| funcargs      { $$ = $1; } 
; 

inheritance:      { $$ = ""; } 
| parenexpr      { $$ = $1; } 
; 

addop: 
    ADDASS       { $$ = "add"; } 
| SUBASS       { $$ = "sub"; } 
| MULASS       { $$ = "mul"; } 
| DIVASS       { $$ = "div"; } 
| INTDASS       { $$ = "int-div"; } 
| MODASS       { $$ = "mod"; } 
| ANDASS       { $$ = "and"; } 
| ORASS       { $$ = "or"; } 
; 

expr:  /* NotYetImplemented! */ 
    NUMBER       { sprintf($$, "(number %s)", $1); } 
| TRUE       { $$ = "(true)"; } 
| FALSE       { $$ = "(false)"; } 
| NONE       { $$ = "(none)"; } 
| STRING       { sprintf($$, "(string %s)", $1); } 
| lstexpr       { $$ = $1; } 
; 

ifs: IF expr suite     { sprintf($$, "(if %s %s)", $2, $3); } ; 

elifs:        { $$ = ""; } 
| elifs elif      { sprintf($$, "%s %s", $1, $2); } 
; 

elif: ELSEIF expr suite    { sprintf($$, "(else-if %s %s)", $2, $3); } ; 

elses:        { $$ = ""; } 
| ELSE suite      { sprintf($$, "(else %s)", $2); } 
; 

trys: TRY suite      { sprintf($$, "(try %s)", $2); } ; 

catchs:        { $$ = ""; } 
| catchs catchx      { sprintf($$, "%s %s", $1, $2); } 
; 

catchx: CATCH expr suite   { sprintf($$, "(catch %s %s)", $2, $3); } ; 

finally: FINALLY suite    { sprintf($$, "(finally %s)", $2); } ; 

suite: COLON stmts SEMICOLON  { sprintf($$, "(block [%s])", $2); } ; 

stmts:        { $$ = ""; } 
| stmts NEWLINE stmt    { sprintf($$, "%s %s", $1, $3); } 
; 

stmt: 
    ASSERT expr      { printf("(assert %s)", $2); } 
| BREAK       { printf("(break)"); } 
| CATCH expr suite    { printf("(catch %s %s)", $2, $3); } 
| CLASS name inheritance suite { printf("(class %s %s %s)", $2, $3, $4); } 
| CONTINUE      { printf("(continue)"); } 
| DEL expr      { printf("(del %s)", $2); } 
| expr DOT EACH eachspec suite { printf("(each %s %s %s)", $1, $4, $5); } 
| FROM name LOAD namelist   { printf("(from %s %s)", $2, $4); } 
| FUNC name optargs suite   { printf("(func %s %s %s)", $2, $3, $4); } 
| ifs elifs elses     { printf("(if-block %s %s %s)", $1, $2, $3); } 
| LOAD namelist     { printf("(load %s)", $2); } 
| PASS       { printf("(pass)"); } 
| PRINT expr      { printf("(print %s)", $2); } 
| REPEAT expr suite    { printf("(repeat %s %s)", $2, $3); } 
| RUN expr      { printf("(run %s)", $2); } 
| THROW expr      { printf("(throw %s)", $2); } 
| trys catchs elses finally  { printf("(try-block %s %s %s %s)", $1, $2, $3, $4); } 
| WHILE expr suite    { printf("(while %s %s)", $2, $3); } 
| UNTIL expr suite    { printf("(until %s %s)", $2, $3); } 
| YIELD expr      { printf("(yield %s)", $2); } 
| RETURN expr      { printf("(return %s)", $2); } 
| expr addop expr     { printf("(%s-assign %s %s)", $2, $1, $3); } 
| expr INCREMENT     { printf("(increment %s)", $1); } 
| expr DECREMENT     { printf("(decrement %s)", $1); } 
| expr       { printf("(expr-stmt %s)", $1); } 
; 

program: stmts      { printf("(program [%s])", $1); } ; 

chance.l

%{ 
    #include <assert.h> 
    #include <stdio.h> 

    #include "parser.tab.h" 
%} 

%option yylineno 
%option noyywrap 

%% 

"assert" { return ASSERT; } 
"break"  { return BREAK; } 
"catch"  { return CATCH; } 
"class"  { return CLASS; } 
"continue" { return CONTINUE; } 
"del"  { return DEL; } 
"each"  { return EACH; } 
"else"  { return ELSE; } 
"elseif" { return ELSEIF; } 
"finally" { return FINALLY; } 
"from"  { return FROM; } 
"func"  { return FUNC; } 
"if"  { return IF; } 
"load"  { return LOAD; } 
"pass"  { return PASS; } 
"print"  { return PRINT; } 
"repeat" { return REPEAT; } 
"return" { return RETURN; } 
"run"  { return RUN; } 
"throw"  { return THROW; } 
"try"  { return TRY; } 
"while"  { return WHILE; } 
"until"  { return UNTIL; } 
"yield"  { return YIELD; } 
"and"  { return AND; } 
"or"  { return OR; } 
"not"  { return NOT; } 
"true"  { return KTRUE; } 
"false"  { return KFALSE; } 
"none"  { return NONE; } 

-   { return MINUS; } 
!   { return EXCLAM; } 
%   { return PERCENT; } 
&   { return LAND; } 
\(   { return LPAREN; } 
\)   { return RPAREN; } 
\*   { return STAR; } 
,   { return COMMA; } 
\.   { return DOT; } 
\/   { return SLASH; } 
:   { return COLON; } 
;   { return SEMICOLON; } 
\?   { return QUESTION; } 
@   { return AT; } 
\[   { return LBRACKET; } 
\]   { return RBRACKET; } 
\^   { return CIRCUMFLEX; } 
\{   { return LBRACE; } 
\}   { return RBRACE; } 
\|   { return BAR; } 
~   { return TILDE; } 
\+   { return PLUS; } 
\<   { return LTHAN; } 
=   { return EQUAL; } 
\>   { return GTHAN; } 
\/\/  { return INTDIV; } 
\+=   { return ADDASS; } 
-=   { return SUBASS; } 
\*=   { return MULASS; } 
\/=   { return DIVASS; } 
\/\/=  { return INTDASS; } 
%=   { return MODASS; } 
&=   { return ANDASS; } 
\|=   { return ORASS; } 
\<=   { return LTEQ; } 
==   { return EQUALS; } 
\>=   { return GTEQ; } 
\+\+  { return INCREMENT; } 
--   { return DECREMENT; } 
\*\*  { return DBLSTAR; } 

[[:digit:]]+([eE][+-]?[[:digit:]]+)?    { yylval.str = strdup(yytext); return INTEGER; } 
[[:digit:]]+\.[[:digit:]]+([eE][+-]?[[:digit:]]+)? { yylval.str = strdup(yytext); return FLOAT; } 
[a-zA-Z_][a-zA-Z0-9_]*        { yylval.str = strdup(yytext); return NAME; } 

\"([^\"])*\" { yylval.str = strdup(yytext); return STRING; } 
\'([^\'])*\' { yylval.str = strdup(yytext); return STRING; } 
`([^`])*`  { yylval.str = strdup(yytext); return STRING; } 

"<INDENT>" { return INDENT; } 
"<DEDENT>" { return DEDENT; } 
"<NEWLINE>" { return NEWLINE; } 

#.*   { } 

[ \\\t]  {} 
\n   { (yylineno) += 0.5; } 
.   { yyerror(); } 

%% 

int yyerror(void) 
{ 
    printf("Invalid syntax on line %d: '%s'\n", yylineno, yytext); 
} 

int main() 
{ 
    yyparse(); 
    printf("\n"); 
    return 0; 
} 

如果上述程序适用于任何人,这里是我的小编程语言的一些示例代码:

test.ch

from timer load x 

func x(f=0, **k): 
    5.each|x|: 
     continue;; 

class OhHey: 
    func __init__: 
     print 5;; 

while true: 
    print x; 

[1, 2, 3] 
(1, 2, 3) 

在此先感谢。 ~~机会

编辑:进入新的和改进的代码(其中,不幸的是,仍然会产生一个段错误。)

回答

2

你的词法不落yylval,所以当你解析器读取的标记的值,它会随机垃圾。例如,在您的规则:

expr: NUMBER { sprintf($$, "(number %s)", $1); } 

$1是指从NUMBER令牌值,那么将是随机的垃圾。另外,$$是规则的输出,因此您在此处传递给sprintf的值也将是随机垃圾(因为您没有将其设置为第一项)。

编辑

一个“易”解决方案是自由的使用strdup/asprintf为字符串分配内存。例如,在你的.L文件,你会碰到这样的:

[+-]?[0-9]+(\.[0-9]+)?([Ee][+-]?[0-9]+)? { yylval = strdup(yytext); return NUMBER; } 

然后你expr的规则是:

expr: NUMBER { asprintf(&$$, "(number %s)", $1); free($1); } 

的问题是,当然,前提是搞清楚所有的释放应避免内存泄漏可以是艰难的。

+0

我已阅读关于'flex'在线,看到这个小片段:'yylval = strdup(yytext)'。那是我应该用的吗? –

+0

@ChanceDeGuzman:是的,这可以在词法分析器中设置yylval。你仍然需要在解析器中设置$$。 –

+0

所以我必须使用'strdup' **和**'asprintf'?我不太熟悉'asprintf'或'free'。 –