mirror of
https://github.com/rust-lang/rust.git
synced 2026-01-25 07:48:44 +00:00
Rollup merge of #146106 - epage:whitespace, r=fee1-dead
fix(lexer): Only allow horizontal whitespace in frontmatter In writing up the reference for frontmatter, I realized that we probably shouldn't be accepting Unicode Line Ending characters between the code fence and infostring or trailing after the infostring or a code fence. In digging into the unicode specification we use for Whitespace, it divides it up into categories, so I'm deferring to what it says for horizontal whitespace for what should be used within a line. Note, I am leaving out support for Unicode Default Ignorable characters. I figure that can be discussed outside of this change within the reference and tracking issue. Fixes rust-lang/rust#145971 Frontmatter tracking issue: rust-lang/rust#136889
This commit is contained in:
@@ -331,24 +331,37 @@ pub fn is_whitespace(c: char) -> bool {
|
||||
|
||||
matches!(
|
||||
c,
|
||||
// Usual ASCII suspects
|
||||
'\u{0009}' // \t
|
||||
| '\u{000A}' // \n
|
||||
// End-of-line characters
|
||||
| '\u{000A}' // line feed (\n)
|
||||
| '\u{000B}' // vertical tab
|
||||
| '\u{000C}' // form feed
|
||||
| '\u{000D}' // \r
|
||||
| '\u{0020}' // space
|
||||
| '\u{000D}' // carriage return (\r)
|
||||
| '\u{0085}' // next line (from latin1)
|
||||
| '\u{2028}' // LINE SEPARATOR
|
||||
| '\u{2029}' // PARAGRAPH SEPARATOR
|
||||
|
||||
// NEXT LINE from latin1
|
||||
| '\u{0085}'
|
||||
|
||||
// Bidi markers
|
||||
// `Default_Ignorable_Code_Point` characters
|
||||
| '\u{200E}' // LEFT-TO-RIGHT MARK
|
||||
| '\u{200F}' // RIGHT-TO-LEFT MARK
|
||||
|
||||
// Dedicated whitespace characters from Unicode
|
||||
| '\u{2028}' // LINE SEPARATOR
|
||||
| '\u{2029}' // PARAGRAPH SEPARATOR
|
||||
// Horizontal space characters
|
||||
| '\u{0009}' // tab (\t)
|
||||
| '\u{0020}' // space
|
||||
)
|
||||
}
|
||||
|
||||
/// True if `c` is considered horizontal whitespace according to Rust language definition.
|
||||
pub fn is_horizontal_whitespace(c: char) -> bool {
|
||||
// This is Pattern_White_Space.
|
||||
//
|
||||
// Note that this set is stable (ie, it doesn't change with different
|
||||
// Unicode versions), so it's ok to just hard-code the values.
|
||||
|
||||
matches!(
|
||||
c,
|
||||
// Horizontal space characters
|
||||
'\u{0009}' // tab (\t)
|
||||
| '\u{0020}' // space
|
||||
)
|
||||
}
|
||||
|
||||
@@ -538,7 +551,7 @@ impl Cursor<'_> {
|
||||
debug_assert!(length_opening >= 3);
|
||||
|
||||
// whitespace between the opening and the infostring.
|
||||
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
|
||||
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
|
||||
|
||||
// copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like
|
||||
// `---Cargo.toml` as a valid opener
|
||||
@@ -547,7 +560,7 @@ impl Cursor<'_> {
|
||||
self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.');
|
||||
}
|
||||
|
||||
self.eat_while(|ch| ch != '\n' && is_whitespace(ch));
|
||||
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
|
||||
let invalid_infostring = self.first() != '\n';
|
||||
|
||||
let mut found = false;
|
||||
@@ -588,7 +601,7 @@ impl Cursor<'_> {
|
||||
// on a standalone line. Might be wrong.
|
||||
while let Some(closing) = rest.find("---") {
|
||||
let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1);
|
||||
if rest[preceding_chars_start..closing].chars().all(is_whitespace) {
|
||||
if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) {
|
||||
// candidate found
|
||||
potential_closing = Some(closing);
|
||||
break;
|
||||
|
||||
@@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr
|
||||
use rustc_errors::codes::*;
|
||||
use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey};
|
||||
use rustc_lexer::{
|
||||
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace,
|
||||
Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace,
|
||||
};
|
||||
use rustc_literal_escaper::{EscapeError, Mode, check_for_errors};
|
||||
use rustc_session::lint::BuiltinLintDiag;
|
||||
@@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
|
||||
|
||||
let last_line_start = within.rfind('\n').map_or(0, |i| i + 1);
|
||||
let last_line = &within[last_line_start..];
|
||||
let last_line_trimmed = last_line.trim_start_matches(is_whitespace);
|
||||
let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace);
|
||||
let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32);
|
||||
|
||||
let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos);
|
||||
@@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
|
||||
});
|
||||
}
|
||||
|
||||
if !rest.trim_matches(is_whitespace).is_empty() {
|
||||
if !rest.trim_matches(is_horizontal_whitespace).is_empty() {
|
||||
let span = self.mk_sp(last_line_start_pos, self.pos);
|
||||
self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span });
|
||||
}
|
||||
|
||||
1
tests/ui/.gitattributes
vendored
1
tests/ui/.gitattributes
vendored
@@ -3,4 +3,5 @@ json-bom-plus-crlf.rs -text
|
||||
json-bom-plus-crlf-multifile.rs -text
|
||||
json-bom-plus-crlf-multifile-aux.rs -text
|
||||
trailing-carriage-return-in-string.rs -text
|
||||
frontmatter-crlf.rs -text
|
||||
*.bin -text
|
||||
|
||||
22
tests/ui/frontmatter/frontmatter-contains-whitespace.rs
Normal file
22
tests/ui/frontmatter/frontmatter-contains-whitespace.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env -S cargo -Zscript
|
||||
---cargo
|
||||
# Beware editing: it has numerous whitespace characters which are important.
|
||||
# It contains one ranges from the 'PATTERN_WHITE_SPACE' property outlined in
|
||||
# https://unicode.org/Public/UNIDATA/PropList.txt
|
||||
#
|
||||
# The characters in the first expression of the assertion can be generated
|
||||
# from: "4\u{0C}+\n\t\r7\t*\u{20}2\u{85}/\u{200E}3\u{200F}*\u{2028}2\u{2029}"
|
||||
package.description = """
|
||||
4+
|
||||
|
||||
7 * 2
/3*
2
|
||||
"""
|
||||
---
|
||||
|
||||
//@ check-pass
|
||||
|
||||
// Ensure the frontmatter can contain any whitespace
|
||||
|
||||
#![feature(frontmatter)]
|
||||
|
||||
fn main() {}
|
||||
14
tests/ui/frontmatter/frontmatter-crlf.rs
Normal file
14
tests/ui/frontmatter/frontmatter-crlf.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env -S cargo -Zscript
|
||||
---
|
||||
[dependencies]
|
||||
clap = "4"
|
||||
---
|
||||
|
||||
//@ check-pass
|
||||
// ignore-tidy-cr
|
||||
|
||||
// crlf line endings should be accepted
|
||||
|
||||
#![feature(frontmatter)]
|
||||
|
||||
fn main() {}
|
||||
@@ -1,7 +1,7 @@
|
||||
|
||||
|
||||
---cargo
|
||||
---
|
||||
---cargo
|
||||
---
|
||||
|
||||
// please note the whitespace characters after the first four lines.
|
||||
// This ensures that we accept whitespaces before the frontmatter, after
|
||||
@@ -10,6 +10,7 @@
|
||||
//@ check-pass
|
||||
// ignore-tidy-end-whitespace
|
||||
// ignore-tidy-leading-newlines
|
||||
// ignore-tidy-tab
|
||||
|
||||
#![feature(frontmatter)]
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
--- cargo
|
||||
--- cargo
|
||||
---
|
||||
|
||||
//@ check-pass
|
||||
// ignore-tidy-tab
|
||||
// A frontmatter infostring can have leading whitespace.
|
||||
|
||||
#![feature(frontmatter)]
|
||||
|
||||
Reference in New Issue
Block a user