如果输入非常一致(如图所示),那么您可能会得到re
。
对于任何更复杂的事情,你可能想看看更强大的解析器,如pyparsing
。
编辑:这里是使用正则表达式一个非常简单的有限状态机解析器;它处理空白行,未使用的select;
和end;
陈述,以及初始/连续when
s。我不处理label
因为我不确定他们在做什么 - 将V变量重命名为X?
import re
class SasTranslator:
def __init__(self):
# modes:
# 0 not in START..END
# 1 in START..END, no CASE seen yet
# 2 in START..END, CASE already found
self.mode = 0
self.offset = -1 # input line #
def handle_blank(self, match):
return ""
def handle_start(self, match):
if self.mode == 0:
self.mode = 1
return None
else:
raise ValueError("Found 'select;' in select block, line {}".format(self.offset))
def handle_end(self, match):
if self.mode == 0:
raise ValueError("Found 'end;' with no opening 'select;', line {}".format(self.offset))
elif self.mode == 1:
raise ValueError("Found empty 'select;' .. 'end;', line {}".format(self.offset))
elif self.mode == 2:
self.mode = 0
return None
def handle_case(self, match):
if self.mode == 0:
raise ValueError("Found 'when' clause outside 'select;' .. 'end;', line {}".format(self.offset))
elif self.mode == 1:
test = "if"
self.mode = 2
# note: code continues after if..else block
elif self.mode == 2:
test = "elif"
# note: code continues after if..else block
test_var, op, test_val, assign_var, assign_val = match.groups()
return (
"{test} {test_var} {op} {test_val}:\n"
" {assign_var} = {assign_val}".format(
test = test,
test_var = test_var,
op = op,
test_val = test_val,
assign_var = assign_var,
assign_val = assign_val
)
)
#
# Build a dispatch table for the handlers
#
BLANK = re.compile("\s*$")
START = re.compile("select;\s*$")
END = re.compile("end;\s*$")
CASE = re.compile("\s*when\((\w+)\s*([<>=]+)\s*([\d.-]+)\s*\)\s*(\w+)\s*=\s*([\d.-]+)\s*;\s*$")
dispatch_table = [
(BLANK, handle_blank),
(START, handle_start),
(END, handle_end),
(CASE, handle_case)
]
def __call__(self, line):
"""
Translate a single line of input
"""
self.offset += 1
for test,handler in SasTranslator.dispatch_table:
match = test.match(line)
if match is not None:
return handler(self, match)
# nothing matched!
return None
def main():
with open("my_file.sas") as inf:
trans = SasTranslator()
for line in inf:
result = trans(line)
if result is not None:
print(result)
else:
print("***unknown*** {}".format(line.rstrip()))
if __name__=="__main__":
main()
,并运行对您的样品输入它产生
if X_1 <= 6.7278:
V_1 = -0.0594
elif X_1 <= 19.5338:
V_1 = 0.0604
elif X_1 <= 45.1458:
V_1 = 0.1755
elif X_1 <= 83.5638:
V_1 = 0.2867
elif X_1 <= 203.0878:
V_1 = 0.395
elif X_1 > 203.0878:
V_1 = 0.5011
***unknown*** label V_1 ="X_1 ";
if X_2 <= 0.0836:
V_2 = 0.0562
elif X_2 <= 0.1826:
V_2 = 0.07
elif X_2 <= 0.2486:
V_2 = 0.0836
elif X_2 <= 0.3146:
V_2 = 0.0969
elif X_2 <= 0.3806:
V_2 = 0.1095
elif X_2 <= 0.4466:
V_2 = 0.1212
elif X_2 <= 0.5126:
V_2 = 0.132
elif X_2 <= 0.5786:
V_2 = 0.1419
elif X_2 <= 0.6446:
V_2 = 0.1511
elif X_2 <= 0.7106:
V_2 = 0.1596
elif X_2 <= 0.8526:
V_2 = 0.1679
elif X_2 > 0.8526:
V_2 = 0.176
***unknown*** label V_2 ="X_2 ";
根据您使用这个频率,它可能是值得的使用bisect
二项式查找功能和翻译select;
..而不是(end;
),而不是将其嵌入到该表单中(尽管您希望非常小心比较运算符是您所期望的!) - 类似于
V_1 = index_into(
X_1,
[ 6.7278, 19.5338, 45.1458, 83.5638, 203.0878 ],
[-0.0594, 0.0604, 0.1755, 0.2867, 0.395, 0.5011]
)
它可能会显着加快运行速度(特别是随着选件数量的增加),并且更容易理解和维护。
谢谢,将研究,同时,你能帮我写一些代码让我开始。谢谢。 – TongZZZ
这真的很有帮助,比我想象的要好得多,我可以轻松地添加更多组件,非常感谢!我也会看看昆虫。 – TongZZZ