位置：首页-资讯-后端开发

用Python实现词法分析器（Lexical Analyzer）

2023-06-02 01:01

短信预约 -IT技能 免费直播动态提醒

　　from __future__ import print_function

　　import sys

　　# following two must remain in the same order

　　tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr, \

　　tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print, \

　　tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, \

　　tk_Integer, tk_String = range(31)

　　all_syms = ["End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract",

　　"Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal",

　　"Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", "Keyword_if",

　　"Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", "LeftParen",

　　"RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier",

　　"Integer", "String"]

　　# single character only symbols

　　symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,

　　'*': tk_Mul, '%': tk_Mod, ';': tk_Semi, ',': tk_Comma }

　　key_words = {'if': tk_If, 'else': tk_Else, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}

　　the_ch = " " # dummy first char - but it must be a space

　　the_col = 0

　　the_line = 1

　　input_file = None

　　#*** show error and exit

　　def error(line, col, msg):

　　print(line, col, msg)

　　exit(1)

　　#*** get the next character from the input

　　def next_ch():

　　global the_ch, the_col, the_line

　　the_ch = input_file.read(1)

　　the_col += 1

　　if the_ch == '\n':

　　the_line += 1

　　the_col = 0

　　return the_ch

　　#*** 'x' - character constants

　　def char_lit(err_line, err_col):

　　n = ord(next_ch()) # skip opening quote

　　if the_ch == '\'':

　　error(err_line, err_col, "empty character constant")

　　elif the_ch == '\\':

　　next_ch()

　　if the_ch == 'n':

　　n = 10

　　elif the_ch == '\\':

　　n = ord('\\')

　　else:

　　error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))

　　if next_ch() != '\'':

　　error(err_line, err_col, "multi-character constant")

　　next_ch()

　　return tk_Integer, err_line, err_col, n

　　#*** process divide or comments

　　def div_or_cmt(err_line, err_col):

　　if next_ch() != '*':

　　return tk_Div, err_line, err_col

　　# comment found

　　next_ch()

　　while True:

　　if the_ch == '*':

　　if next_ch() == '/':

　　next_ch()

　　return gettok()

　　elif len(the_ch) == 0:

　　error(err_line, err_col, "EOF in comment")

　　else:

　　next_ch()

　　#*** "string"

　　def string_lit(start, err_line, err_col):

　　text = ""

　　while next_ch() != start:

　　if len(the_ch) == 0:

　　error(err_line, err_col, "EOF while scanning string literal")

　　if the_ch == '\n':

　　error(err_line, err_col, "EOL while scanning string literal")

　　text += the_ch

　　next_ch()

　　return tk_String, err_line, err_col, text

　　#*** handle identifiers and integers

　　def ident_or_int(err_line, err_col):

　　is_number = True

　　text = ""

　　while the_ch.isalnum() or the_ch == '_':

　　text += the_ch

　　if not the_ch.isdigit():

　　is_number = False

　　next_ch()

　　if len(text) == 0:

　　error(err_line, err_col, "ident_or_int: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

　　if text[0].isdigit():

　　if not is_number:

　　error(err_line, err_col, "invalid number: %s" % (text))

　　n = int(text)

　　return tk_Integer, err_line, err_col, n

　　if text in key_words:

　　return key_words[text], err_line, err_col

　　return tk_Ident, err_line, err_col, text

　　#*** look ahead for '>=', etc.

　　def follow(expect, ifyes, ifno, err_line, err_col):

　　if next_ch() == expect:

　　next_ch()

　　return ifyes, err_line, err_col

　　if ifno == tk_EOI:郑州人流医院哪家好 http://mobile.zhongyuan120.com/

　　error(err_line, err_col, "follow: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

　　return ifno, err_line, err_col

　　#*** return the next token type

　　def gettok():

　　while the_ch.isspace():

　　next_ch()

　　err_line = the_line

　　err_col = the_col

　　if len(the_ch) == 0: return tk_EOI, err_line, err_col

　　elif the_ch == '/': return div_or_cmt(err_line, err_col)

　　elif the_ch == '\'': return char_lit(err_line, err_col)

　　elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)

　　elif the_ch == '>': return follow('=', tk_Geq, tk_Gtr, err_line, err_col)

　　elif the_ch == '=': return follow('=', tk_Eq, tk_Assign, err_line, err_col)

　　elif the_ch == '!': return follow('=', tk_Neq, tk_Not, err_line, err_col)

　　elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)

　　elif the_ch == '|': return follow('|', tk_Or, tk_EOI, err_line, err_col)

　　elif the_ch == '"': return string_lit(the_ch, err_line, err_col)

　　elif the_ch in symbols:

　　sym = symbols[the_ch]

　　next_ch()

　　return sym, err_line, err_col

　　else: return ident_or_int(err_line, err_col)

　　#*** main driver

　　input_file = sys.stdin

　　if len(sys.argv) > 1:

　　try:

　　input_file = open(sys.argv[1], "r", 4096)

　　except IOError as e:

　　error(0, 0, "Can't open %s" % sys.argv[1])

　　while True:

　　t = gettok()

　　tok = t[0]

　　line = t[1]

　　col = t[2]

　　print("%5d %5d %-14s" % (line, col, all_syms[tok]), end='')

　　if tok == tk_Integer: print(" %5d" % (t[3]))

　　elif tok == tk_Ident: print(" %s" % (t[3]))

　　elif tok == tk_String: print(' "%s"' % (t[3]))

　　else: print("")

　　if tok == tk_EOI:

　　break

　　输出(测试用例三)

　　5 16 Keyword_print

　　5 40 Op_subtract

　　6 16 Keyword_putc

　　6 40 Op_less

　　7 16 Keyword_if

　　7 40 Op_greater

　　8 16 Keyword_else

　　8 40 Op_lessequal

　　9 16 Keyword_while

　　9 40 Op_greaterequal

　　10 16 LeftBrace

　　10 40 Op_equal

　　11 16 RightBrace

　　11 40 Op_notequal

　　12 16 LeftParen

　　12 40 Op_and

　　13 16 RightParen

　　13 40 Op_or

　　14 16 Op_subtract

　　14 40 Semicolon

　　15 16 Op_not

　　15 40 Comma

　　16 16 Op_multiply

　　16 40 Op_assign

　　17 16 Op_divide

　　17 40 Integer 42

　　18 16 Op_mod

　　18 40 String "String literal"

　　19 16 Op_add

　　19 40 Identifier variable_name

　　20 26 Integer 10

　　21 26 Integer 92

　　22 26 Integer 32

　　23 1 End_of_input

免责声明：

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的，并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据，供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

阅读原文内容投诉

用Python实现词法分析器（Lexical Analyzer）

下载Word文档到电脑，方便收藏和打印～

下载Word文档

用Python实现 词法分析器（Lexical Analyzer）

用Python实现 词法分析器（Lexical Analyzer）

相关文章

猜你喜欢

用Python实现 词法分析器（Lexical Analyzer）

JavaScript如何实现简单的词法分析器

Python基于jieba分词实现snownlp情感分析

利用PHP实现词法分析器与自定义语言

详解JavaScript实现简单的词法分析器示例

Python底层技术解析：如何实现分词和词性标注

Python中文分词实现方法(安装pymmseg)

Python使用re模块实现okenizer(表达式分词器)

Python分割单词和转换命名法的实现

PHP怎么实现词法分析与自定义语言

java词法分析器DDL递归怎么应用

java词法分析器DDL递归应用详解

Python利用re模块实现简易分词(tokenization)

Python中文分词工具之结巴分词用法实例总结【经典案例】

Python怎么利用re模块实现简易分词

主成分分析法(PCA)及其python实现

利用python实现数据分析

Python实现的堆排序算法原理与用法实例分析

如何分析基于结构化平均感知机的分词器Java实现

Python中Class类用法实例分析

热门标签

编程热搜

编程资源站

目录

感谢您的提交，我们服务专员将在30分钟内给您回复

用Python实现词法分析器（Lexical Analyzer）

用Python实现词法分析器（Lexical Analyzer）

用Python实现词法分析器（Lexical Analyzer）