From 9d0e47dd9d089d97398936269324a7d0076374bf Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Sat, 07 Feb 2026 17:37:36 +0000 Subject: [PATCH] refactor: implement yaml escaping rules as in std.jsonnet --- --- a/crates/jrsonnet-stdlib/src/manifest/yaml.rs +++ b/crates/jrsonnet-stdlib/src/manifest/yaml.rs @@ -28,6 +28,7 @@ /// safe_key: 1 /// ``` quote_keys: bool, + quote_values: bool, /// If true - then order of fields is preserved as written, /// instead of sorting alphabetically #[cfg(feature = "exp-preserve-order")] @@ -43,6 +44,7 @@ padding: Cow::Owned(padding.clone()), arr_element_padding: Cow::Owned(padding), quote_keys: false, + quote_values: false, #[cfg(feature = "exp-preserve-order")] preserve_order, } @@ -56,6 +58,7 @@ padding: Cow::Borrowed(" "), arr_element_padding: Cow::Borrowed(if indent_array_in_object { " " } else { "" }), quote_keys, + quote_values: true, #[cfg(feature = "exp-preserve-order")] preserve_order, } @@ -67,36 +70,97 @@ } } -/// From -/// With added date check -fn yaml_needs_quotes(string: &str) -> bool { - fn need_quotes_spaces(string: &str) -> bool { - string.starts_with(' ') || string.ends_with(' ') +fn bare_safe(key: &str) -> bool { + fn count_char_u(k: &str, c: char) -> usize { + let cu = c.to_ascii_uppercase(); + k.chars().filter(|v| *v == c || *v == cu).count() + } + fn count_char(k: &str, c: char) -> usize { + k.chars().filter(|v| *v == c).count() } + fn is_reserved(key: &str) -> bool { + const RESERVED: &[&str] = &[ + // Boolean types taken from https://yaml.org/type/bool.html + "true", "false", "yes", "no", "on", "off", "y", "n", + // Numerical words taken from https://yaml.org/type/float.html + ".nan", "-.inf", "+.inf", ".inf", "null", + // Invalid keys that contain no invalid characters + "-", "---", "", + ]; + RESERVED.iter().any(|k| key.eq_ignore_ascii_case(k)) + } - string.is_empty() - || need_quotes_spaces(string) - || string.starts_with(['&' , '*' , '?' , '|' , '-' , '<' , '>' , '=' , '!' , '%' , '@']) - || string.contains(|c| matches!(c, ':' | '{' | '}' | '[' | ']' | ',' | '#' | '`' | '\"' | '\'' | '\\' | '\0'..='\x06' | '\t' | '\n' | '\r' | '\x0e'..='\x1a' | '\x1c'..='\x1f')) - || [ - // http://yaml.org/type/bool.html - "yes", "Yes", "YES", "no", "No", "NO", "True", "TRUE", "true", "False", "FALSE", "false", - "on", "On", "ON", "off", "Off", "OFF", // http://yaml.org/type/null.html - "null", "Null", "NULL", "~", - // > Quoted in std.jsonnet, however, in serde_yaml they were quoted: - // > Note: 'y', 'Y', 'n', 'N', is not quoted deliberately, as in libyaml. PyYAML also parse - // > them as string, not booleans, although it is violating the YAML 1.1 specification. - // > See https://github.com/dtolnay/serde-yaml/pull/83#discussion_r152628088. - "y", "Y", "n", "N", - "-.inf", "+.inf", ".inf", - "-", "---", "" - ].contains(&string) - || (string.chars().all(|c| matches!(c, '0'..='9' | '-')) - && string.chars().filter(|c| *c == '-').count() == 2) - || string.starts_with('.') - || string.starts_with("0x") - || string.parse::().is_ok() - || string.parse::().is_ok() + // Check for unsafe characters + if !key + .chars() + .all(|v| matches!(v, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' | '/')) + { + return false; + } + // Check for reserved words + if is_reserved(key) { + return false; + } + // Check for timestamp values. Since spaces and colons are already forbidden, + // all that could potentially pass is the standard date format (ex MM-DD-YYYY, YYYY-DD-MM, etc). + // This check is even more conservative: Keys that meet all of the following: + // - all characters match [0-9\-] + // - has exactly 2 dashes + // are considered dates. + if key.chars().all(|v| matches!(v, '0'..='9' | '-')) && count_char(key, '-') == 2 { + return false; + } + // Check for integers. Keys that meet all of the following: + // - all characters match [0-9_\-] + // - has at most 1 dash + // are considered integers. + else if key.chars().all(|v| matches!(v, '0'..='9' | '-' | '_')) && count_char(key, '-') < 2 { + return false; + } + // Check for binary integers. Keys that meet all of the following: + // - all characters match [0-9b_\-] + // - has at least 3 characters + // - starts with (-)0b + // are considered binary integers. + else if key + .chars() + .all(|v| matches!(v, '0'..='9' | '-' | '_' | 'b' | 'B')) + && (key.starts_with("0b") || key.starts_with("-0b")) + && key.len() > 2 + { + return false; + } + // Check for floats. Keys that meet all of the following: + // - all characters match [0-9e._\-] + // - has at most a single period + // - has at most two dashes + // - has at most 1 'e' + // are considered floats. + else if key + .chars() + .all(|v| matches!(v, '0'..='9' | '-' | '_' | 'e' | 'E' | '.')) + && count_char_u(key, 'e') < 2 + && count_char(key, '-') < 3 + && count_char(key, '.') <= 1 + { + return false; + } + // Check for hexadecimals. Keys that meet all of the following: + // - all characters match [0-9a-fx_\-] + // - has at most 1 dash + // - has at least 3 characters + // - starts with (-)0x + // are considered hexadecimals. + else if key + .chars() + .all(|v| matches!(v, '0'..='9' | '-' | '_' | 'x' | 'X' | 'a'..='f' | 'A'..='F' )) + && key.len() >= 3 + && count_char(key, '-') < 2 + && (key.starts_with("-0x") || key.starts_with("0x")) + { + return false; + } + true } #[allow(dead_code)] @@ -142,7 +206,7 @@ buf.push_str(&options.padding); buf.push_str(line); } - } else if !options.quote_keys && !yaml_needs_quotes(&s) { + } else if !options.quote_values && bare_safe(&s) { buf.push_str(&s); } else { escape_string_json_buf(&s, buf); @@ -203,7 +267,7 @@ buf.push('\n'); buf.push_str(cur_padding); } - if !options.quote_keys && !yaml_needs_quotes(&key) { + if !options.quote_keys && bare_safe(&key) { buf.push_str(&key); } else { escape_string_json_buf(&key, buf); -- gitstuff