Skip to content

Commit be2b6a4

Browse files
committed
Adapt to cedarscript-grammar==0.4.0
1 parent 28418c7 commit be2b6a4

File tree

2 files changed

+71
-50
lines changed

2 files changed

+71
-50
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ classifiers = [
2626
]
2727
keywords = ["parser", "ast", "cedarscript", "code-editing", "refactoring", "code-analysis", "sql-like", "ai-assisted-development"]
2828
dependencies = [
29-
"cedarscript-grammar>=0.2.2",
29+
"cedarscript-grammar>=0.4.0",
3030
]
3131
requires-python = ">=3.8"
3232

src/cedarscript_ast_parser/cedarscript_ast_parser.py

+70-49
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
class ParseError(NamedTuple):
11-
command_ordinal: int
11+
ordinal: int
1212
message: str
1313
line: int
1414
column: int
@@ -17,10 +17,12 @@ class ParseError(NamedTuple):
1717
def __str__(self):
1818
line_msg = f'; LINE #{self.line}' if self.line else ''
1919
col_msg = f'; COLUMN #{self.column}' if self.column else ''
20+
cmd_or_block = 'BLOCK' if self.line else 'COMMAND'
21+
error_type = 'Source Parsing' if self.line else 'CST Parsing'
2022
suggestion_msg = f'{self.suggestion} ' if self.suggestion else ''
2123
return (
22-
f"<error-details><error-location>COMMAND #{self.command_ordinal}{line_msg}{col_msg}</error-location>"
23-
f"<type>PARSING (no commands were applied at all)</type><description>{self.message}</description>"
24+
f"<error-details><error-location>{cmd_or_block} #{self.ordinal}{line_msg}{col_msg}</error-location>"
25+
f"<type>{error_type} (no commands were applied at all)</type><description>{self.message}</description>"
2426
f"<suggestion>{suggestion_msg}"
2527
"(NEVER apologize; just take a deep breath, re-read grammar rules (enclosed by <grammar.js> tags) "
2628
"and fix you CEDARScript syntax)</suggestion></error-details>"
@@ -60,7 +62,9 @@ class Marker(MarkerCompatible):
6062
type: MarkerType
6163
value: str
6264
offset: int | None = None
63-
marker_subtype: str | None = None # 'REGEX', 'PREFIX', 'SUFFIX' for LINE type
65+
66+
# See `line_base`
67+
marker_subtype: str | None = None
6468

6569
@property
6670
def as_marker(self) -> 'Marker':
@@ -71,10 +75,13 @@ def __str__(self):
7175
match self.marker_subtype:
7276
case 'string' | None:
7377
pass
78+
case 'empty':
79+
result = 'empty line'
7480
case _:
75-
result += self.marker_subtype
81+
result += f' {self.marker_subtype}'
7682

77-
result += f" '{self.value.strip()}'"
83+
if self.marker_subtype != 'empty':
84+
result += f" '{self.value.strip()}'"
7885
if self.offset is not None:
7986
result += f" at offset {self.offset}"
8087
return result
@@ -243,11 +250,12 @@ class LoopControl(StrEnum):
243250
class CaseWhen:
244251
"""Represents a WHEN condition in a CASE statement"""
245252
empty: bool = False
253+
indent_level: int | None = None
254+
line_number: int | None = None
255+
line_matcher: str | None = None
246256
regex: str | None = None
247257
prefix: str | None = None
248258
suffix: str | None = None
249-
indent_level: int | None = None
250-
line_number: int | None = None
251259

252260

253261
@dataclass
@@ -360,7 +368,7 @@ def parse_script(self, code_text: str) -> tuple[Sequence[Command], Sequence[Pars
360368
for child in root_node.children:
361369
node_type = child.type.casefold()
362370
if node_type == 'comment':
363-
print("(COMMENT) " + self.parse_string(child).removeprefix("--").strip())
371+
print("(COMMENT) " + self.parse_string(child).removeprefix("--").removeprefix("/*").strip())
364372
if not node_type.endswith('_command'):
365373
continue
366374
commands.append(self.parse_command(child))
@@ -371,7 +379,7 @@ def parse_script(self, code_text: str) -> tuple[Sequence[Command], Sequence[Pars
371379
# Handle any unexpected exceptions during parsing
372380
error_message = str(e)
373381
error = ParseError(
374-
command_ordinal=command_ordinal,
382+
ordinal=command_ordinal,
375383
message=error_message,
376384
line=0,
377385
column=0,
@@ -399,7 +407,7 @@ def _collect_parse_errors(self, node, code_text, command_ordinal: int) -> list[P
399407
suggestion = _generate_suggestion(node, code_text)
400408

401409
error = ParseError(
402-
command_ordinal=command_ordinal,
410+
ordinal=command_ordinal,
403411
message=message,
404412
line=line,
405413
column=column,
@@ -474,18 +482,18 @@ def parse_update_target(self, node):
474482
raise ValueError(f"[parse_update_target] Invalid target: {invalid}")
475483

476484
def parse_identifier_from_file(self, node):
477-
identifier_marker = self.find_first_by_type(node.named_children, 'identifierMarker')
478-
identifier_type = MarkerType(identifier_marker.children[0].type.casefold())
479-
name = self.parse_string(identifier_marker.named_children[0])
480-
offset_clause = self.find_first_by_type(identifier_marker.named_children, 'offset_clause')
485+
identifier_matcher = self.find_first_by_type(node.named_children, 'identifier_matcher')
486+
identifier_type = MarkerType(identifier_matcher.children[0].type.casefold())
487+
name = self.parse_string(identifier_matcher.named_children[0])
488+
offset_clause = self.find_first_by_type(identifier_matcher.named_children, 'offset_clause')
481489
file_clause = self.find_first_by_type(node.named_children, 'singlefile_clause')
482490
where_clause = self.find_first_by_type(node.named_children, 'where_clause')
483491

484492
if not file_clause or not name:
485493
raise ValueError("Invalid identifier_from_file clause")
486494

487495
file_path = self.parse_singlefile_clause(file_clause).file_path
488-
offset = self.parse_offset_clause(offset_clause) if offset_clause else None
496+
offset = self.find_primitive(offset_clause) if offset_clause else None
489497
where = self.parse_where_clause(where_clause)
490498

491499
return IdentifierFromFile(file_path=file_path,
@@ -579,7 +587,7 @@ def parse_region(self, node) -> Region:
579587
node = node.named_children[0]
580588

581589
match node.type.casefold():
582-
case 'marker' | 'linemarker' | 'identifiermarker':
590+
case 'marker' | 'line_matcher' | 'identifier_matcher':
583591
result = self.parse_marker(node)
584592
case 'segment':
585593
result = self.parse_segment(node)
@@ -596,27 +604,25 @@ def parse_marker(self, node) -> Marker:
596604
if node.type.casefold() == 'marker':
597605
node = node.named_children[0]
598606

599-
marker_type = node.children[0].type # LINE, VARIABLE, FUNCTION, METHOD or CLASS
607+
marker_type = node.named_children[0].type
608+
# LINE, VARIABLE, FUNCTION, METHOD or CLASS
600609
marker_subtype = None
601610
value = None
602611

603-
if marker_type != 'LINE': # VARIABLE, FUNCTION, METHOD or CLASS
604-
value = self.parse_string(self.find_first_by_type(node.named_children, 'string'))
605612
# Handle the different marker types
606-
else:
607-
# Get the second child which is either a string/number or a subtype specifier
608-
second_child = node.children[1]
609-
marker_subtype = second_child.type
610-
if second_child.type in ['string', 'number']:
611-
match second_child.type:
612-
case 'string':
613-
value = self.parse_string(second_child)
614-
case _:
615-
value = second_child.text.decode('utf8')
616-
else: # REGEX, PREFIX, or SUFFIX
617-
value = self.parse_string(node.children[2])
618-
619-
offset = self.parse_offset_clause(self.find_first_by_type(node.named_children, 'offset_clause'))
613+
if marker_type == 'line_base':
614+
marker_type = 'line'
615+
# subtype: None, number, EMPTY, REGEX, PREFIX, SUFFIX, INDENT-LEVEL
616+
line_base_node = node.named_children[0] # line_base
617+
marker_subtype = [n.type.casefold() for n in line_base_node.children if n.type.casefold() != 'line'][0]
618+
value = self.find_primitive(line_base_node)
619+
620+
else: # identifier_matcher
621+
marker_type = node.children[0].type.casefold()
622+
value = self.find_primitive(node)
623+
624+
node1 = self.find_first_by_type(node.named_children, 'offset_clause')
625+
offset = self.find_primitive(node1)
620626
return Marker(
621627
type=MarkerType(marker_type.casefold()),
622628
marker_subtype=marker_subtype,
@@ -631,16 +637,9 @@ def parse_segment(self, node) -> Segment:
631637
end: RelativeMarker = self.parse_region(relpos_end)
632638
return Segment(start=start, end=end)
633639

634-
def parse_offset_clause(self, node):
635-
if node is None:
636-
return None
637-
return int(self.find_first_by_type(node.children, 'number').text)
638-
639640
def parse_relative_indentation(self, node) -> int | None:
640641
node = self.find_first_by_type(node.named_children, 'relative_indentation')
641-
if node is None:
642-
return None
643-
return int(self.find_first_by_type(node.named_children, 'number').text)
642+
return self.find_primitive(node)
644643

645644
def parse_content(self, node) -> str | tuple[Region, int | None] | None:
646645
content = self.find_first_by_type(node.named_children, [
@@ -668,7 +667,7 @@ def parse_case_stmt(self, node) -> CaseStatement:
668667
current_when = None
669668
for child in node.children:
670669
match child.type:
671-
case 'case_when':
670+
case 'line_base':
672671
current_when = self.parse_case_when(child)
673672
case 'case_action' if current_when is not None:
674673
action = self.parse_case_action(child)
@@ -686,20 +685,28 @@ def parse_case_stmt(self, node) -> CaseStatement:
686685
def parse_case_when(self, node) -> CaseWhen:
687686
"""Parse a WHEN clause in a CASE statement"""
688687
when = CaseWhen()
689-
688+
690689
if self.find_first_by_field_name(node, 'empty'):
691690
when.empty = True
691+
692+
elif indent := self.find_first_by_field_name(node, 'indent_level'):
693+
when.indent_level = int(indent.text)
694+
695+
elif line_num := self.find_first_by_field_name(node, 'line_number'):
696+
when.line_number = int(line_num.text)
697+
698+
elif line_str := self.find_first_by_field_name(node, 'line_matcher'):
699+
when.line_matcher = self.parse_string(line_str)
700+
692701
elif regex := self.find_first_by_field_name(node, 'regex'):
693702
when.regex = re.compile(self.parse_string(regex))
703+
694704
elif prefix := self.find_first_by_field_name(node, 'prefix'):
695705
when.prefix = self.parse_string(prefix)
706+
696707
elif suffix := self.find_first_by_field_name(node, 'suffix'):
697708
when.suffix = self.parse_string(suffix)
698-
elif indent := self.find_first_by_field_name(node, 'indent_level'):
699-
when.indent_level = int(indent.text)
700-
elif line_num := self.find_first_by_field_name(node, 'line_number'):
701-
when.line_number = int(line_num.text)
702-
709+
703710
return when
704711

705712
def parse_case_action(self, node) -> CaseAction:
@@ -775,6 +782,20 @@ def parse_to_value_clause(self, node):
775782
raise ValueError("No value found in to_value_clause")
776783
return self.parse_string(value_node)
777784

785+
def find_primitive(self, node):
786+
if node is None:
787+
return None
788+
node = self.find_first_by_type(node.named_children, ['string', 'number'])
789+
if node is None:
790+
return None
791+
match node.type.casefold():
792+
case 'string':
793+
return self.parse_string(node)
794+
case 'number':
795+
return int(node.text)
796+
case _:
797+
raise ValueError(f"[find_primitive] Invalid primitive: {node.type} ({node.text})")
798+
778799
@staticmethod
779800
def parse_string(node):
780801
match node.type.casefold():

0 commit comments

Comments
 (0)