Skip to content

Commit 3263710

Browse files
fix sandbox world door parse
closes #1
1 parent 66b9c64 commit 3263710

File tree

2 files changed

+28
-10
lines changed

2 files changed

+28
-10
lines changed

json_runner.test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,5 @@
108108
- say ((1 2 3) foo bar)
109109
- say (#[list 1 2 3])
110110
- say I'm a tomato!
111+
- say (sandbox world door)
111112
"""))

json_runner/string_parsing.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def process_escapes(string):
137137

138138

139139
def process_token(token):
140+
if not token:
141+
return ""
140142
if token[0] in "'\"":
141143
return process_escapes(token[1:-1])
142144
try:
@@ -155,30 +157,45 @@ def escape_atom(a):
155157

156158

157159
def tokenize(string, atoms):
158-
ATOM_TOKEN = "|".join(map(escape_atom, sorted(atoms, reverse=True)))
159-
STRING_TOKEN = r"""(?<=\s|^)(?P<q>['"])(?:\\\S|(?!(?P=q))[\s\S])*?(?P=q)(?=\s|$)"""
160-
NUMBER_TOKEN = r"0x\d+|-?\d+(?:\.\d+(?:[eE][+-]\d+)?)?"
161-
PAREN_TOKEN = r"[\[\](){}]"
162-
NOT_ANYNAME_TOKEN = r"(?P<any>" + "|".join(map(lambda t: f"(?:{t})", filter(
163-
None, [PAREN_TOKEN, STRING_TOKEN, ATOM_TOKEN, NUMBER_TOKEN]))) + ")"
164-
ANYNAME_TOKEN = r"(?:(?!(?&any))\S)+"
165-
ALL_TOKENS = regex.compile(NOT_ANYNAME_TOKEN + "|" + ANYNAME_TOKEN)
160+
ATOM_REGEX = "|".join(
161+
fr"(?&start){regex.escape(a)}(?&end)"
162+
if a[0].isalpha() and a[-1].isalpha()
163+
else regex.escape(a)
164+
for a in sorted(atoms, reverse=True)
165+
)
166+
if ATOM_REGEX:
167+
ATOM_REGEX = "| (?:%s)" % ATOM_REGEX
168+
ALL_TOKENS = r"""
169+
(?(DEFINE)
170+
(?P<start>(?<=\s|^))
171+
(?P<end>(?=\s|$))
172+
)
173+
(?P<special>
174+
(?:[\[\](){}]) # parens
175+
| (?:(?&start)(?P<q>['"])(?:\\\S|(?!(?P=q))[\s\S])*?(?P=q)(?&end))
176+
# double or single quoted string
177+
%s # an atom (but NOT in a word) -- this will be formatted in below vv
178+
| (?:0x\d+|-?\d+(?:\.\d+(?:[eE][+-]\d+)?)?) # a number
179+
) | (?:(?:(?!(?&special))\S)+) # anything that is not special token""" % ATOM_REGEX
180+
ALL_TOKENS = regex.compile(ALL_TOKENS, flags=regex.X)
166181
i = 0
167182
while i < len(string):
168183
match = ALL_TOKENS.search(string, i)
169184
if not match:
170185
return
171186
token = match.group(0)
187+
if not token:
188+
raise_token_error([Token(i, None, " ", string)], f"empty token (internal error) {atoms=}")
172189
yield Token(match.start(), process_token(token), token, string)
173190
i = match.end()
174191

175192

176193
if __name__ == '__main__':
177-
line, atoms = "It's a tomato! (Sub-expression) {]{]{] pa(\"'\\n\\n\") spooky! (the orange) And errors: {{{", [
194+
line, atoms = "sandbox world door", [
178195
"$", "or", "and", "is in"]
179196
for t in tokenize(line, atoms):
180197
print(t.line)
181198
print(" " * t.start + "^" * len(t.source), repr(t.value))
182-
parsed = parse_interpolated(line, atoms)
199+
parsed = parse2(line, atoms, "()")
183200
print("-" * 80)
184201
print(parsed)

0 commit comments

Comments
 (0)