-
-
Notifications
You must be signed in to change notification settings - Fork 14.8k
tests: add whitespace tests for vertical tab behavior #155028
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
821d9b2
1609baf
7c00dc2
1546b1d
5b487fc
91fd464
874a618
b727680
52f1d1e
c5aea24
f219e91
87fcb28
47fb045
f06914b
7027a64
4d8a428
93b13d3
7e47ea6
00a37bb
a2e128a
233f744
b32995b
ead2b71
d0bc9e4
2f981ce
3d1ad29
2506ce4
f1eb5e7
16b2655
701bc97
ece7316
a1eb231
6e459b9
dc0d44a
5661524
523f70a
1db9763
52225e6
185a582
43f045c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| //@ run-pass | ||
| // This test checks that split_ascii_whitespace does NOT split on | ||
| // vertical tab (\x0B), because the standard library uses the WhatWG | ||
| // Infra Standard definition of ASCII whitespace, which excludes | ||
| // vertical tab. | ||
| // | ||
| // See: https://github.com/rust-lang/rust-project-goals/issues/53 | ||
|
|
||
| fn main() { | ||
| let s = "a\x0Bb"; | ||
|
|
||
| let parts: Vec<&str> = s.split_ascii_whitespace().collect(); | ||
|
|
||
| assert_eq!(parts.len(), 1, | ||
| "vertical tab should not be treated as ASCII whitespace"); | ||
|
|
||
| let s2 = "a b"; | ||
| let parts2: Vec<&str> = s2.split_ascii_whitespace().collect(); | ||
| assert_eq!(parts2.len(), 2, | ||
| "regular space should split correctly"); | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| // This test ensures that the Rust lexer rejects invalid whitespace | ||
| // characters such as ZERO WIDTH SPACE. | ||
|
|
||
| //@ check-fail | ||
|
|
||
| fn main() { | ||
| let x = 5; | ||
| let y = 10; | ||
|
|
||
| let a=x + y; | ||
| //~^ ERROR unknown start of token | ||
| //~| HELP invisible characters like | ||
| } | ||
|
Brace1000 marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| error: unknown start of token: \u{200b} | ||
| --> $DIR/invalid_whitespace.rs:10:11 | ||
| | | ||
| LL | let a=x + y; | ||
| | ^ | ||
| | | ||
| = help: invisible characters like '\u{200b}' are not usually visible in text editors | ||
|
|
||
| error: aborting due to 1 previous error | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to update this file so it matches the test output exactly. The easiest way to do this is
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it, thanks! I ran "./x test ui/whitespace --bless" and updated the file to match the expected output. Everything is passing on my end now |
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| //@ run-pass | ||
| // ignore-tidy-tab | ||
| // | ||
| // Tests that the Rust lexer accepts Unicode Pattern_White_Space characters. | ||
| // | ||
| // Worth noting: the Rust reference defines whitespace as Pattern_White_Space, | ||
| // which is not the same as what is_ascii_whitespace or is_whitespace give you. | ||
| // | ||
| // is_ascii_whitespace follows WhatWG and skips vertical tab (\x0B). | ||
| // is_whitespace uses Unicode White_Space, which is a broader set. | ||
| // | ||
| // The 11 characters that actually count as whitespace in Rust source: | ||
| // \x09 \x0A \x0B \x0C \x0D \x20 \u{85} \u{200E} \u{200F} \u{2028} \u{2029} | ||
| // | ||
| // Ref: https://github.com/rustfoundation/interop-initiative/issues/53 | ||
|
|
||
| #[rustfmt::skip] | ||
| fn main() { | ||
| // tab (\x09) between let and the name | ||
| let _ws1 = 1_i32; | ||
|
|
||
| // vertical tab (\x0B) between let and the name | ||
| // this is the one is_ascii_whitespace gets wrong | ||
| let_ws2 = 2_i32; | ||
|
|
||
| // form feed (\x0C) between let and the name | ||
| let_ws3 = 3_i32; | ||
|
|
||
| // plain space (\x20), here just so every character is represented | ||
| let _ws4 = 4_i32; | ||
|
|
||
| // NEL (\u{85}) between let and the name | ||
| let _ws5 = 5_i32; | ||
|
|
||
| // left-to-right mark (\u{200E}) between let and the name | ||
| let_ws6 = 6_i32; | ||
|
|
||
| // right-to-left mark (\u{200F}) between let and the name | ||
| let_ws7 = 7_i32; | ||
|
|
||
| // \x0A, \x0D, \u{2028}, \u{2029} are also Pattern_White_Space but they | ||
| // act as line endings, so you can't stick them in the middle of a statement. | ||
| // The lexer still handles them correctly at line boundaries. | ||
|
|
||
| // These are Unicode White_Space but NOT Pattern_White_Space: | ||
| // \u{A0} no-break space \u{1680} ogham space mark | ||
| // \u{2000} en quad \u{2001} em quad | ||
| // \u{2002} en space \u{2003} em space | ||
| // \u{2004} three-per-em space \u{2005} four-per-em space | ||
| // \u{2006} six-per-em space \u{2007} figure space | ||
| // \u{2008} punctuation space \u{2009} thin space | ||
| // \u{200A} hair space \u{202F} narrow no-break space | ||
| // \u{205F} medium math space \u{3000} ideographic space | ||
|
|
||
| // add them up so the compiler doesn't complain about unused variables | ||
| let _sum = _ws1 + _ws2 + _ws3 + _ws4 + _ws5 + _ws6 + _ws7; | ||
| println!("{}", _sum); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this test is relevant to the compiler?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair point. The test documents the gap between what the lexer accepts and what the stdlib gives you. Happy to remove it if you think it doesn't belong here.