Skip to content

Commit 900223e

Browse files
committed
Add support for bytes litterals.
1 parent 8b2e776 commit 900223e

File tree

8 files changed

+220
-100
lines changed

8 files changed

+220
-100
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ nom = "^4.0"
1313
#nom_locate = { path = "../nom_locate/" }
1414
nom_locate = { git = "https://github.com/ProgVal/nom_locate", branch = "nom4" }
1515
unicode-xid = "^0.1"
16+
#unicode_names = "^0.1.7"
1617

1718
[dev-dependencies]
1819
pretty_assertions = "^0.4"

src/ast.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ pub struct Funcdef {
336336
pub struct Classdef {
337337
pub decorators: Vec<Decorator>,
338338
pub name: String,
339-
pub parameters: Arglist,
339+
pub arguments: Arglist,
340340
pub code: Vec<Statement>,
341341
}
342342

src/bytes.rs

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
use std::cmp::min;
2+
3+
use helpers::StrSpan;
4+
5+
named!(escapedchar<StrSpan, Option<u8>>,
6+
preceded!(char!('\\'),
7+
alt!(
8+
char!('\n') => { |_| None }
9+
| char!('\\') => { |_| Some(b'\\') }
10+
| char!('\'') => { |_| Some(b'\'') }
11+
| char!('"') => { |_| Some(b'"') }
12+
| char!('b') => { |_| Some(b'\x07') } // BEL
13+
| char!('f') => { |_| Some(b'\x0c') } // FF
14+
| char!('n') => { |_| Some(b'\n') }
15+
| char!('r') => { |_| Some(b'\r') }
16+
| char!('t') => { |_| Some(b'\t') }
17+
| char!('v') => { |_| Some(b'\x0b') } // VT
18+
| tuple!(one_of!("01234567"), opt!(one_of!("01234567")), opt!(one_of!("01234567"))) => { |(c1, c2, c3): (char, Option<char>, Option<char>)|
19+
match (c1.to_digit(8), c2.and_then(|c| c.to_digit(2)), c3.and_then(|c| c.to_digit(2))) {
20+
(Some(d1), Some(d2), Some(d3)) => Some(min((d1 << 6) + (d2 << 3) + d3, 255) as u8),
21+
(Some(d1), Some(d2), None ) => Some(((d1 << 3) + d2) as u8),
22+
(Some(d1), None, None ) => Some(d1 as u8),
23+
_ => unreachable!(),
24+
}
25+
}
26+
| preceded!(char!('x'), tuple!(one_of!("0123456789abcdef"), one_of!("0123456789abcdef"))) => { |(c1, c2): (char, char)|
27+
match (c1.to_digit(16), c2.to_digit(16)) {
28+
(Some(d1), Some(d2)) => Some(((d1 << 8) + d2) as u8),
29+
_ => unreachable!(),
30+
}
31+
}
32+
)
33+
)
34+
);
35+
36+
named_args!(shortbytes(quote: char) <StrSpan, Vec<u8>>,
37+
fold_many0!(
38+
alt!(
39+
call!(escapedchar)
40+
| verify!(none_of!("\\"), |c:char| c != quote) => { |c:char| Some(c as u8) }
41+
),
42+
Vec::new(),
43+
|mut acc:Vec<u8>, c:Option<u8>| { match c { Some(c) => acc.push(c), None => () }; acc }
44+
)
45+
);
46+
47+
named_args!(longbytes(quote: char) <StrSpan, Vec<u8>>,
48+
fold_many0!(
49+
alt!(
50+
call!(escapedchar)
51+
| verify!(tuple!(peek!(take!(3)), none_of!("\\")), |(s,_):(StrSpan,_)| { s.fragment.0.chars().collect::<Vec<char>>() != vec![quote,quote,quote] }) => { |(_,c)| Some(c as u8) }
52+
),
53+
Vec::new(),
54+
|mut acc:Vec<u8>, c:Option<u8>| { match c { Some(c) => acc.push(c), None => () }; acc }
55+
)
56+
);
57+
58+
59+
60+
named!(pub bytes<StrSpan, Vec<u8>>,
61+
do_parse!(
62+
prefix: alt!(tag!("br")|tag!("Br")|tag!("bR")|tag!("BR")|tag!("rb")|tag!("rB")|tag!("Rb")|tag!("RB")|tag!("b")|tag!("B")|tag!("")) >>
63+
content: alt!(
64+
delimited!(tag!("'''"), call!(longbytes, '\''), tag!("'''"))
65+
| delimited!(tag!("\"\"\""), call!(longbytes, '"'), tag!("\"\"\""))
66+
| delimited!(char!('\''), call!(shortbytes, '\''), char!('\''))
67+
| delimited!(char!('"'), call!(shortbytes, '"'), char!('"'))
68+
) >> (content)
69+
)
70+
);

src/expressions.rs

+52-96
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ use std::marker::PhantomData;
22

33
use nom;
44
use nom::{IResult, Err, Context, ErrorKind};
5+
//use unicode_names;
56

67
use helpers;
78
use helpers::{StrSpan, name};
89
use helpers::{AreNewlinesSpaces, NewlinesAreSpaces};
910
use functions::varargslist;
11+
use bytes::bytes;
12+
use strings::string;
1013
use ast::*;
1114

1215
#[derive(Clone, Debug, PartialEq)]
@@ -244,92 +247,6 @@ named!(atom_expr<StrSpan, Box<Expression>>,
244247
)
245248
);
246249

247-
named!(escapedchar<StrSpan, Option<char>>,
248-
preceded!(char!('\\'),
249-
alt!(
250-
char!('\n') => { |_| None }
251-
| char!('\\') => { |_| Some('\\') }
252-
| char!('\'') => { |_| Some('\'') }
253-
| char!('"') => { |_| Some('"') }
254-
| char!('b') => { |_| Some('\x07') } // BEL
255-
| char!('f') => { |_| Some('\x0c') } // FF
256-
| char!('n') => { |_| Some('\n') }
257-
| char!('r') => { |_| Some('\r') }
258-
| char!('t') => { |_| Some('\t') }
259-
| char!('v') => { |_| Some('\x0b') } // VT
260-
| tuple!(one_of!("01234567"), opt!(one_of!("01234567")), opt!(one_of!("01234567"))) => { |(c1, c2, c3): (char, Option<char>, Option<char>)|
261-
match (c1.to_digit(8), c2.and_then(|c| c.to_digit(2)), c3.and_then(|c| c.to_digit(2))) {
262-
(Some(d1), Some(d2), Some(d3)) => ::std::char::from_u32((d1 << 6) + (d2 << 3) + d3),
263-
(Some(d1), Some(d2), None ) => ::std::char::from_u32((d1 << 3) + d2),
264-
(Some(d1), None, None ) => ::std::char::from_u32(d1),
265-
_ => unreachable!(),
266-
}
267-
}
268-
| preceded!(char!('x'), tuple!(one_of!("0123456789abcdef"), one_of!("0123456789abcdef"))) => { |(c1, c2): (char, char)|
269-
match (c1.to_digit(16), c2.to_digit(16)) {
270-
(Some(d1), Some(d2)) => ::std::char::from_u32((d1 << 8) + d2),
271-
_ => unreachable!(),
272-
}
273-
}
274-
| char!('N') => { |_| unimplemented!() } // TODO
275-
| char!('u') => { |_| unimplemented!() } // TODO
276-
| char!('U') => { |_| unimplemented!() } // TODO
277-
)
278-
)
279-
);
280-
281-
named_args!(shortstring(quote: char) <StrSpan, String>,
282-
fold_many0!(
283-
alt!(
284-
call!(Self::escapedchar)
285-
| verify!(none_of!("\\"), |c:char| c != quote) => { |c:char| Some(c) }
286-
),
287-
String::new(),
288-
|mut acc:String, c:Option<char>| { match c { Some(c) => acc.push_str(&c.to_string()), None => () }; acc }
289-
)
290-
);
291-
292-
named_args!(longstring(quote: char) <StrSpan, String>,
293-
fold_many0!(
294-
alt!(
295-
call!(Self::escapedchar)
296-
| verify!(tuple!(peek!(take!(3)), none_of!("\\")), |(s,_):(StrSpan,_)| { s.fragment.0.chars().collect::<Vec<char>>() != vec![quote,quote,quote] }) => { |(_,c)| Some(c) }
297-
),
298-
String::new(),
299-
|mut acc:String, c:Option<char>| { match c { Some(c) => acc.push_str(&c.to_string()), None => () }; acc }
300-
)
301-
);
302-
303-
named!(string<StrSpan, PyString>,
304-
do_parse!(
305-
prefix: alt!(tag!("fr")|tag!("Fr")|tag!("fR")|tag!("FR")|tag!("rf")|tag!("rF")|tag!("Rf")|tag!("RF")|tag!("r")|tag!("u")|tag!("R")|tag!("U")|tag!("f")|tag!("F")|tag!("")) >>
306-
content: alt!(
307-
delimited!(
308-
tag!("'''"),
309-
call!(Self::longstring, '\''),
310-
tag!("'''")
311-
)
312-
| delimited!(
313-
tag!("\"\"\""),
314-
call!(Self::longstring, '"'),
315-
tag!("\"\"\"")
316-
)
317-
| delimited!(
318-
char!('\''),
319-
call!(Self::shortstring, '\''),
320-
char!('\'')
321-
)
322-
| delimited!(
323-
char!('"'),
324-
call!(Self::shortstring, '"'),
325-
char!('"')
326-
)
327-
) >> (
328-
PyString { prefix: prefix.to_string(), content: content.to_string() }
329-
)
330-
)
331-
);
332-
333250
// atom: ('(' [yield_expr|testlist_comp] ')' |
334251
// '[' [testlist_comp] ']' |
335252
// '{' [dictorsetmaker] '}' |
@@ -340,8 +257,13 @@ named!(atom<StrSpan, Box<Expression>>,
340257
| tag!("None") => { |_| Expression::None }
341258
| tag!("True") => { |_| Expression::True }
342259
| tag!("False") => { |_| Expression::False }
260+
| separated_nonempty_list!(spaces!(), string) => { |s| Expression::String(s) }
261+
| separated_nonempty_list!(spaces!(), bytes) => { |v| {
262+
let mut v2 = Vec::new();
263+
for b in v { v2.extend(b) }
264+
Expression::Bytes(v2)
265+
}}
343266
| name => { |n| Expression::Name(n) }
344-
| separated_nonempty_list!(spaces!(), call!(Self::string)) => { |s| Expression::String(s) }
345267
| ws3!(tuple!(char!('['), opt!(ws!(char!(' '))), char!(']'))) => { |_| Expression::ListLiteral(vec![]) }
346268
| ws3!(tuple!(char!('{'), opt!(ws!(char!(' '))), char!('}'))) => { |_| Expression::DictLiteral(vec![]) }
347269
| ws3!(tuple!(char!('('), opt!(ws!(char!(' '))), char!(')'))) => { |_| Expression::TupleLiteral(vec![]) }
@@ -675,25 +597,59 @@ mod tests {
675597
use super::*;
676598

677599
#[test]
678-
fn test_atom() {
600+
fn test_string() {
679601
let atom = ExpressionParser::<NewlinesAreNotSpaces>::atom;
680602
let new_pystring = |s: &str| PyString { prefix: "".to_string(), content: s.to_string() };
681-
assert_parse_eq(atom(make_strspan("foo ")), Ok((make_strspan(" "), Box::new(Expression::Name("foo".to_string())))));
682-
assert_parse_eq(atom(make_strspan(r#""foo" "#)), Ok((make_strspan(" "), Box::new(Expression::String(vec![new_pystring("foo")])))));
683-
assert_parse_eq(atom(make_strspan(r#""foo" "bar""#)), Ok((make_strspan(""), Box::new(Expression::String(vec![new_pystring("foo"), new_pystring("bar")])))));
684-
assert_parse_eq(atom(make_strspan(r#""fo\"o" "#)), Ok((make_strspan(" "), Box::new(Expression::String(vec![new_pystring("fo\"o")])))));
685-
assert_parse_eq(atom(make_strspan(r#""fo"o" "#)), Ok((make_strspan(r#"o" "#), Box::new(Expression::String(vec![new_pystring("fo")])))));
686-
assert_parse_eq(atom(make_strspan(r#""fo \" o" "#)), Ok((make_strspan(" "), Box::new(Expression::String(vec![new_pystring("fo \" o")])))));
687-
assert_parse_eq(atom(make_strspan(r#"'fo \' o' "#)), Ok((make_strspan(" "), Box::new(Expression::String(vec![new_pystring("fo ' o")])))));
603+
assert_parse_eq(atom(make_strspan(r#""foo" "#)), Ok((make_strspan(" "),
604+
Box::new(Expression::String(vec![new_pystring("foo")])))
605+
));
606+
assert_parse_eq(atom(make_strspan(r#""foo" "bar""#)), Ok((make_strspan(""),
607+
Box::new(Expression::String(vec![new_pystring("foo"), new_pystring("bar")])))
608+
));
609+
assert_parse_eq(atom(make_strspan(r#""fo\"o" "#)), Ok((make_strspan(" "),
610+
Box::new(Expression::String(vec![new_pystring("fo\"o")])))
611+
));
612+
assert_parse_eq(atom(make_strspan(r#""fo"o" "#)), Ok((make_strspan(r#"o" "#),
613+
Box::new(Expression::String(vec![new_pystring("fo")])))
614+
));
615+
assert_parse_eq(atom(make_strspan(r#""fo \" o" "#)), Ok((make_strspan(" "),
616+
Box::new(Expression::String(vec![new_pystring("fo \" o")])))
617+
));
618+
assert_parse_eq(atom(make_strspan(r#"'fo \' o' "#)), Ok((make_strspan(" "),
619+
Box::new(Expression::String(vec![new_pystring("fo ' o")])))
620+
));
688621
}
689622

690623
#[test]
691-
fn test_triple_quotes() {
624+
fn test_triple_quotes_string() {
692625
let new_pystring = |s: &str| PyString { prefix: "".to_string(), content: s.to_string() };
693626
let atom = ExpressionParser::<NewlinesAreNotSpaces>::atom;
694627
assert_parse_eq(atom(make_strspan(r#"'''fo ' o''' "#)), Ok((make_strspan(" "), Box::new(Expression::String(vec![new_pystring("fo ' o")])))));
695628
}
696629

630+
#[test]
631+
fn test_bytes() {
632+
let atom = ExpressionParser::<NewlinesAreNotSpaces>::atom;
633+
assert_parse_eq(atom(make_strspan(r#"b"foo" "#)), Ok((make_strspan(" "),
634+
Box::new(Expression::Bytes(b"foo".to_vec())))
635+
));
636+
assert_parse_eq(atom(make_strspan(r#"b"foo" "bar""#)), Ok((make_strspan(""),
637+
Box::new(Expression::Bytes(b"foobar".to_vec())))
638+
));
639+
assert_parse_eq(atom(make_strspan(r#"b"fo\"o" "#)), Ok((make_strspan(" "),
640+
Box::new(Expression::Bytes(b"fo\"o".to_vec())))
641+
));
642+
assert_parse_eq(atom(make_strspan(r#"b"fo"o" "#)), Ok((make_strspan(r#"o" "#),
643+
Box::new(Expression::Bytes(b"fo".to_vec())))
644+
));
645+
assert_parse_eq(atom(make_strspan(r#"b"fo \" o" "#)), Ok((make_strspan(" "),
646+
Box::new(Expression::Bytes(b"fo \" o".to_vec())))
647+
));
648+
assert_parse_eq(atom(make_strspan(r#"b'fo \' o' "#)), Ok((make_strspan(" "),
649+
Box::new(Expression::Bytes(b"fo ' o".to_vec())))
650+
));
651+
}
652+
697653
#[test]
698654
fn test_ternary() {
699655
let test = ExpressionParser::<NewlinesAreNotSpaces>::test;

src/functions.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,11 @@ named_args!(classdef(indent: usize, decorators: Vec<Decorator>) <StrSpan, Compou
7474
tag!("class") >>
7575
space_sep2 >>
7676
name: name >>
77-
parameters: ws2!(delimited!(char!('('), ws!(call!(ExpressionParser::<NewlinesAreSpaces>::arglist)), char!(')'))) >>
77+
arguments: ws2!(delimited!(char!('('), ws!(call!(ExpressionParser::<NewlinesAreSpaces>::arglist)), char!(')'))) >>
7878
ws2!(char!(':')) >>
7979
code: call!(block, indent) >> (
8080
CompoundStatement::Classdef(Classdef {
81-
decorators, name, parameters, code
81+
decorators, name, arguments, code
8282
})
8383
)
8484
)

src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ extern crate nom_locate;
99
extern crate pretty_assertions;
1010

1111
extern crate unicode_xid;
12+
//extern crate unicode_names;
1213

1314
#[macro_use]
1415
mod helpers;
@@ -17,6 +18,8 @@ mod expressions;
1718
#[macro_use]
1819
mod statements;
1920
mod functions;
21+
mod strings;
22+
mod bytes;
2023
pub mod ast;
2124
pub mod visitors;
2225

src/strings.rs

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
use helpers::StrSpan;
2+
use ast::*;
3+
4+
named!(escapedchar<StrSpan, Option<char>>,
5+
preceded!(char!('\\'),
6+
alt!(
7+
char!('\n') => { |_| None }
8+
| char!('\\') => { |_| Some('\\') }
9+
| char!('\'') => { |_| Some('\'') }
10+
| char!('"') => { |_| Some('"') }
11+
| char!('b') => { |_| Some('\x07') } // BEL
12+
| char!('f') => { |_| Some('\x0c') } // FF
13+
| char!('n') => { |_| Some('\n') }
14+
| char!('r') => { |_| Some('\r') }
15+
| char!('t') => { |_| Some('\t') }
16+
| char!('v') => { |_| Some('\x0b') } // VT
17+
| tuple!(one_of!("01234567"), opt!(one_of!("01234567")), opt!(one_of!("01234567"))) => { |(c1, c2, c3): (char, Option<char>, Option<char>)|
18+
match (c1.to_digit(8), c2.and_then(|c| c.to_digit(2)), c3.and_then(|c| c.to_digit(2))) {
19+
(Some(d1), Some(d2), Some(d3)) => ::std::char::from_u32((d1 << 6) + (d2 << 3) + d3),
20+
(Some(d1), Some(d2), None ) => ::std::char::from_u32((d1 << 3) + d2),
21+
(Some(d1), None, None ) => ::std::char::from_u32(d1),
22+
_ => unreachable!(),
23+
}
24+
}
25+
| preceded!(char!('x'), tuple!(one_of!("0123456789abcdef"), one_of!("0123456789abcdef"))) => { |(c1, c2): (char, char)|
26+
match (c1.to_digit(16), c2.to_digit(16)) {
27+
(Some(d1), Some(d2)) => ::std::char::from_u32((d1 << 8) + d2),
28+
_ => unreachable!(),
29+
}
30+
}
31+
//| preceded!(char!('N'), delimited!(char!('{'), none_of!("}"), char!('}'))) => { |name|
32+
// unicode_names::character(name)
33+
// }
34+
| char!('N') => { |_| unimplemented!() }
35+
| char!('u') => { |_| unimplemented!() } // TODO
36+
| char!('U') => { |_| unimplemented!() } // TODO
37+
)
38+
)
39+
);
40+
41+
named_args!(shortstring(quote: char) <StrSpan, String>,
42+
fold_many0!(
43+
alt!(
44+
call!(escapedchar)
45+
| verify!(none_of!("\\"), |c:char| c != quote) => { |c:char| Some(c) }
46+
),
47+
String::new(),
48+
|mut acc:String, c:Option<char>| { match c { Some(c) => acc.push_str(&c.to_string()), None => () }; acc }
49+
)
50+
);
51+
52+
named_args!(longstring(quote: char) <StrSpan, String>,
53+
fold_many0!(
54+
alt!(
55+
call!(escapedchar)
56+
| verify!(tuple!(peek!(take!(3)), none_of!("\\")), |(s,_):(StrSpan,_)| { s.fragment.0.chars().collect::<Vec<char>>() != vec![quote,quote,quote] }) => { |(_,c)| Some(c) }
57+
),
58+
String::new(),
59+
|mut acc:String, c:Option<char>| { match c { Some(c) => acc.push_str(&c.to_string()), None => () }; acc }
60+
)
61+
);
62+
63+
named!(pub string<StrSpan, PyString>,
64+
do_parse!(
65+
prefix: alt!(tag!("fr")|tag!("Fr")|tag!("fR")|tag!("FR")|tag!("rf")|tag!("rF")|tag!("Rf")|tag!("RF")|tag!("r")|tag!("u")|tag!("R")|tag!("U")|tag!("f")|tag!("F")|tag!("")) >>
66+
content: alt!(
67+
delimited!(tag!("'''"), call!(longstring, '\''), tag!("'''"))
68+
| delimited!(tag!("\"\"\""), call!(longstring, '"'), tag!("\"\"\""))
69+
| delimited!(char!('\''), call!(shortstring, '\''), char!('\''))
70+
| delimited!(char!('"'), call!(shortstring, '"'), char!('"'))
71+
) >> (PyString { prefix: prefix.to_string(), content: content.to_string() })
72+
)
73+
);
74+

0 commit comments

Comments
 (0)