From a5a3db2d32ff1d359aef5ec586b91164570c1685 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 5 Nov 2019 09:56:15 -0800 Subject: [PATCH 1/7] Support custom vendor strings. Add support for custom vendors, as in "x86_64-gentoo-linux-musl". Fixes #33. --- src/targets.rs | 108 ++++++++++++++++++++++++++++++++++++++++++++++++- src/triple.rs | 4 -- 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/src/targets.rs b/src/targets.rs index 6ae570e..90b2736 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/targets.rs @@ -1,6 +1,8 @@ // This file defines all the identifier enums and target-aware logic. use crate::triple::{Endianness, PointerWidth, Triple}; +use alloc::boxed::Box; +use alloc::string::String; use core::fmt; use core::str::FromStr; @@ -292,7 +294,7 @@ impl Aarch64Architecture { /// The "vendor" field, which in practice is little more than an arbitrary /// modifier. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] #[allow(missing_docs)] pub enum Vendor { Unknown, @@ -306,6 +308,15 @@ pub enum Vendor { Sun, Uwp, Wrs, + + /// A custom vendor. "Custom" in this context means that the vendor is + /// not specifically recognized by upstream Autotools, LLVM, Rust, or other + /// relevant authorities on triple naming. It's useful for people building + /// and using locally patched toolchains. + /// + /// Outside of such patched environments, users of `target-lexicon` should + /// treat `Custom` the same as `Unknown` and ignore the string. + Custom(Box), } /// The "operating system" field, which sometimes implies an environment, and @@ -717,6 +728,7 @@ impl fmt::Display for Vendor { Vendor::Sun => "sun", Vendor::Uwp => "uwp", Vendor::Wrs => "wrs", + Vendor::Custom(ref name) => name, }; f.write_str(s) } @@ -738,7 +750,46 @@ impl FromStr for Vendor { "sun" => Vendor::Sun, "uwp" => Vendor::Uwp, "wrs" => Vendor::Wrs, - _ => return Err(()), + custom => { + use alloc::borrow::ToOwned; + + // A custom vendor. Since triple syntax is so loosely defined, + // be as conservative as we can to avoid potential ambiguities. + // We err on the side of being too strict here, as we can + // always relax it if needed. + + // Don't allow empty string names. + if custom.is_empty() { + return Err(()); + } + + // Don't allow any other recognized name as a custom vendor, + // since vendors can be omitted in some contexts. + if Architecture::from_str(custom).is_ok() + || OperatingSystem::from_str(custom).is_ok() + || Environment::from_str(custom).is_ok() + || BinaryFormat::from_str(custom).is_ok() + { + return Err(()); + } + + // Require the first character to be an ascii lowercase. + if !custom.chars().nth(0).unwrap().is_ascii_lowercase() { + return Err(()); + } + + // Restrict the set of characters permitted in a custom vendor. + if custom + .find(|c: char| { + !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.') + }) + .is_some() + { + return Err(()); + } + + Vendor::Custom(Box::new(custom.to_owned())) + } }) } } @@ -1120,4 +1171,57 @@ mod tests { assert_eq!(t.environment, Environment::Eabihf); assert_eq!(t.binary_format, BinaryFormat::Elf); } + + #[test] + fn custom_vendors() { + assert!(Triple::from_str("x86_64--linux").is_err()); + assert!(Triple::from_str("x86_64-42-linux").is_err()); + assert!(Triple::from_str("x86_64-__customvendor__-linux").is_err()); + assert!(Triple::from_str("x86_64-^-linux").is_err()); + assert!(Triple::from_str("x86_64- -linux").is_err()); + assert!(Triple::from_str("x86_64-CustomVendor-linux").is_err()); + assert!(Triple::from_str("x86_64-linux-linux").is_err()); + assert!(Triple::from_str("x86_64-x86_64-linux").is_err()); + assert!(Triple::from_str("x86_64-elf-linux").is_err()); + assert!(Triple::from_str("x86_64-gnu-linux").is_err()); + assert!(Triple::from_str("x86_64-linux-customvendor").is_err()); + assert!(Triple::from_str("customvendor").is_err()); + assert!(Triple::from_str("customvendor-x86_64").is_err()); + assert!(Triple::from_str("x86_64-").is_err()); + assert!(Triple::from_str("x86_64--").is_err()); + + let t = Triple::from_str("x86_64-customvendor-linux") + .expect("can't parse target with custom vendor"); + assert_eq!(t.architecture, Architecture::X86_64); + assert_eq!( + t.vendor, + Vendor::Custom(Box::new(String::from_str("customvendor").unwrap())) + ); + assert_eq!(t.operating_system, OperatingSystem::Linux); + assert_eq!(t.environment, Environment::Unknown); + assert_eq!(t.binary_format, BinaryFormat::Elf); + assert_eq!(t.to_string(), "x86_64-customvendor-linux"); + + let t = Triple::from_str("x86_64-customvendor") + .expect("can't parse target with custom vendor"); + assert_eq!(t.architecture, Architecture::X86_64); + assert_eq!( + t.vendor, + Vendor::Custom(Box::new(String::from_str("customvendor").unwrap())) + ); + assert_eq!(t.operating_system, OperatingSystem::Unknown); + assert_eq!(t.environment, Environment::Unknown); + assert_eq!(t.binary_format, BinaryFormat::Unknown); + + assert_eq!( + Triple::from_str("unknown-foo"), + Ok(Triple { + architecture: Architecture::Unknown, + vendor: Vendor::Custom(Box::new(String::from_str("foo").unwrap())), + operating_system: OperatingSystem::Unknown, + environment: Environment::Unknown, + binary_format: BinaryFormat::Unknown, + }) + ); + } } diff --git a/src/triple.rs b/src/triple.rs index 36dcd9a..1abda26 100644 --- a/third_party/rust/target-lexicon.0.9.0/src/triple.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/triple.rs @@ -322,10 +322,6 @@ mod tests { Triple::from_str("foo"), Err(ParseError::UnrecognizedArchitecture("foo".to_owned())) ); - assert_eq!( - Triple::from_str("unknown-foo"), - Err(ParseError::UnrecognizedVendor("foo".to_owned())) - ); assert_eq!( Triple::from_str("unknown-unknown-foo"), Err(ParseError::UnrecognizedOperatingSystem("foo".to_owned())) From 6f90d7274dce4e7f9bb120f6b36cf26881bde9a7 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 5 Nov 2019 10:33:56 -0800 Subject: [PATCH 2/7] Add more tests. --- src/targets.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/targets.rs b/src/targets.rs index 90b2736..7d1f069 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/targets.rs @@ -1174,6 +1174,7 @@ mod tests { #[test] fn custom_vendors() { + // Test various invalid cases. assert!(Triple::from_str("x86_64--linux").is_err()); assert!(Triple::from_str("x86_64-42-linux").is_err()); assert!(Triple::from_str("x86_64-__customvendor__-linux").is_err()); @@ -1190,6 +1191,31 @@ mod tests { assert!(Triple::from_str("x86_64-").is_err()); assert!(Triple::from_str("x86_64--").is_err()); + // Test various Unicode things. + assert!( + Triple::from_str("x86_64-𝓬𝓾𝓼𝓽𝓸𝓶𝓿𝓮𝓷𝓭𝓸𝓻-linux").is_err(), + "unicode font hazard" + ); + assert!( + Triple::from_str("x86_64-ćúśtőḿvéńdőŕ-linux").is_err(), + "diacritical mark stripping hazard" + ); + assert!( + Triple::from_str("x86_64-customvendοr-linux").is_err(), + "homoglyph hazard" + ); + assert!(Triple::from_str("x86_64-customvendor-linux").is_ok()); + assert!( + Triple::from_str("x86_64-ffi-linux").is_err(), + "normalization hazard" + ); + assert!(Triple::from_str("x86_64-ffi-linux").is_ok()); + assert!( + Triple::from_str("x86_64-custom‍vendor-linux").is_err(), + "zero-width character hazard" + ); + + // Test some valid cases. let t = Triple::from_str("x86_64-customvendor-linux") .expect("can't parse target with custom vendor"); assert_eq!(t.architecture, Architecture::X86_64); @@ -1202,8 +1228,8 @@ mod tests { assert_eq!(t.binary_format, BinaryFormat::Elf); assert_eq!(t.to_string(), "x86_64-customvendor-linux"); - let t = Triple::from_str("x86_64-customvendor") - .expect("can't parse target with custom vendor"); + let t = + Triple::from_str("x86_64-customvendor").expect("can't parse target with custom vendor"); assert_eq!(t.architecture, Architecture::X86_64); assert_eq!( t.vendor, From c0e318b3c1be2d1965579f07dd563fb9cc0c4eb1 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 5 Nov 2019 12:56:31 -0800 Subject: [PATCH 3/7] Use `.chars().any(...)` instead of `.find(...).is_some()`. --- src/targets.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/targets.rs b/src/targets.rs index 7d1f069..1078dd3 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon/src-0.9.0/targets.rs @@ -779,12 +779,9 @@ impl FromStr for Vendor { } // Restrict the set of characters permitted in a custom vendor. - if custom - .find(|c: char| { - !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.') - }) - .is_some() - { + if custom.chars().any(|c: char| { + !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.') + }) { return Err(()); } From f319950528654c772193d9eb3bf40bc8df35fcae Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 7 Nov 2019 15:15:48 -0800 Subject: [PATCH 4/7] Fix build.rs to generate the correct code to build Vendors. --- build.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/build.rs b/build.rs index a0ba3b7..446f9e7 100644 --- a/third_party/rust/target-lexicon-0.9.0/build.rs +++ b/third_party/rust/target-lexicon-0.9.0/build.rs @@ -32,6 +32,7 @@ mod parse_error { } } +use self::targets::Vendor; use self::triple::Triple; fn main() { @@ -60,7 +61,7 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { " architecture: Architecture::{:?},", triple.architecture )?; - writeln!(out, " vendor: Vendor::{:?},", triple.vendor)?; + writeln!(out, " vendor: {},", vendor_display(&triple.vendor))?; writeln!( out, " operating_system: OperatingSystem::{:?},", @@ -90,7 +91,7 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { writeln!(out, "impl Vendor {{")?; writeln!(out, " /// Return the vendor for the current host.")?; writeln!(out, " pub const fn host() -> Self {{")?; - writeln!(out, " Vendor::{:?}", triple.vendor)?; + writeln!(out, " {}", vendor_display(&triple.vendor))?; writeln!(out, " }}")?; writeln!(out, "}}")?; writeln!(out)?; @@ -160,3 +161,12 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { Ok(()) } + +fn vendor_display(vendor: &Vendor) -> String { + match vendor { + Vendor::Custom(custom) => { + format!("Vendor::Custom(Box::new(String::from_str({:?})))", custom) + } + known => format!("Vendor::{:?}", known), + } +} From e558f6934535be3b8ccc9a99a33e861cb7431dfe Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 8 Nov 2019 12:10:34 -0800 Subject: [PATCH 5/7] Fix custom vendors in `const fn` contexts. --- build.rs | 15 +++++++++++---- src/lib.rs | 4 ++-- src/targets.rs | 51 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/build.rs b/build.rs index 446f9e7..e88206e 100644 --- a/third_party/rust/target-lexicon-0.9.0/build.rs +++ b/third_party/rust/target-lexicon-0.9.0/build.rs @@ -53,6 +53,8 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { writeln!(out, "use crate::Aarch64Architecture::*;")?; writeln!(out, "#[allow(unused_imports)]")?; writeln!(out, "use crate::ArmArchitecture::*;")?; + writeln!(out, "#[allow(unused_imports)]")?; + writeln!(out, "use crate::CustomVendor;")?; writeln!(out)?; writeln!(out, "/// The `Triple` of the current host.")?; writeln!(out, "pub const HOST: Triple = Triple {{")?; @@ -139,7 +141,11 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { " architecture: Architecture::{:?},", triple.architecture )?; - writeln!(out, " vendor: Vendor::{:?},", triple.vendor)?; + writeln!( + out, + " vendor: {},", + vendor_display(&triple.vendor) + )?; writeln!( out, " operating_system: OperatingSystem::{:?},", @@ -164,9 +170,10 @@ fn write_host_rs(mut out: File, triple: Triple) -> io::Result<()> { fn vendor_display(vendor: &Vendor) -> String { match vendor { - Vendor::Custom(custom) => { - format!("Vendor::Custom(Box::new(String::from_str({:?})))", custom) - } + Vendor::Custom(custom) => format!( + "Vendor::Custom(CustomVendor::Static({:?}))", + custom.as_str() + ), known => format!("Vendor::{:?}", known), } } diff --git a/src/lib.rs b/src/lib.rs index 8d6da8d..70f6488 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/lib.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/lib.rs @@ -28,7 +28,7 @@ mod triple; pub use self::host::HOST; pub use self::parse_error::ParseError; pub use self::targets::{ - Aarch64Architecture, Architecture, ArmArchitecture, BinaryFormat, Environment, OperatingSystem, - Vendor, + Aarch64Architecture, Architecture, ArmArchitecture, BinaryFormat, CustomVendor, Environment, + OperatingSystem, Vendor, }; pub use self::triple::{CallingConvention, Endianness, PointerWidth, Triple}; diff --git a/src/targets.rs b/src/targets.rs index 1078dd3..7152020 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/targets.rs @@ -4,6 +4,7 @@ use crate::triple::{Endianness, PointerWidth, Triple}; use alloc::boxed::Box; use alloc::string::String; use core::fmt; +use core::hash::{Hash, Hasher}; use core::str::FromStr; /// The "architecture" field, which in some cases also specifies a specific @@ -292,6 +293,39 @@ impl Aarch64Architecture { } } +/// A string for a `Vendor::Custom` that can either be used in `const` +/// contexts or hold dynamic strings. +#[derive(Clone, Debug, Eq)] +pub enum CustomVendor { + /// An owned `String`. This supports the general case. + Owned(Box), + /// A static `str`, so that `CustomVendor` can be constructed in `const` + /// contexts. + Static(&'static str), +} + +impl CustomVendor { + /// Extracts a string slice. + pub fn as_str(&self) -> &str { + match self { + CustomVendor::Owned(s) => s, + CustomVendor::Static(s) => s, + } + } +} + +impl PartialEq for CustomVendor { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} + +impl Hash for CustomVendor { + fn hash(&self, state: &mut H) { + self.as_str().hash(state) + } +} + /// The "vendor" field, which in practice is little more than an arbitrary /// modifier. #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -316,7 +350,7 @@ pub enum Vendor { /// /// Outside of such patched environments, users of `target-lexicon` should /// treat `Custom` the same as `Unknown` and ignore the string. - Custom(Box), + Custom(CustomVendor), } /// The "operating system" field, which sometimes implies an environment, and @@ -728,7 +762,7 @@ impl fmt::Display for Vendor { Vendor::Sun => "sun", Vendor::Uwp => "uwp", Vendor::Wrs => "wrs", - Vendor::Custom(ref name) => name, + Vendor::Custom(ref name) => name.as_str(), }; f.write_str(s) } @@ -779,13 +813,14 @@ impl FromStr for Vendor { } // Restrict the set of characters permitted in a custom vendor. - if custom.chars().any(|c: char| { + fn is_prohibited_char(c: char) -> bool { !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.') - }) { + } + if custom.chars().any(is_prohibited_char) { return Err(()); } - Vendor::Custom(Box::new(custom.to_owned())) + Vendor::Custom(CustomVendor::Owned(Box::new(custom.to_owned()))) } }) } @@ -1218,7 +1253,7 @@ mod tests { assert_eq!(t.architecture, Architecture::X86_64); assert_eq!( t.vendor, - Vendor::Custom(Box::new(String::from_str("customvendor").unwrap())) + Vendor::Custom(CustomVendor::Static("customvendor")) ); assert_eq!(t.operating_system, OperatingSystem::Linux); assert_eq!(t.environment, Environment::Unknown); @@ -1230,7 +1265,7 @@ mod tests { assert_eq!(t.architecture, Architecture::X86_64); assert_eq!( t.vendor, - Vendor::Custom(Box::new(String::from_str("customvendor").unwrap())) + Vendor::Custom(CustomVendor::Static("customvendor")) ); assert_eq!(t.operating_system, OperatingSystem::Unknown); assert_eq!(t.environment, Environment::Unknown); @@ -1240,7 +1275,7 @@ mod tests { Triple::from_str("unknown-foo"), Ok(Triple { architecture: Architecture::Unknown, - vendor: Vendor::Custom(Box::new(String::from_str("foo").unwrap())), + vendor: Vendor::Custom(CustomVendor::Static("foo")), operating_system: OperatingSystem::Unknown, environment: Environment::Unknown, binary_format: BinaryFormat::Unknown, From bc4b444133b8a5e56602f7c77c10ef3f1e7a7c78 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 18 Nov 2019 13:45:58 -0800 Subject: [PATCH 6/7] Add a testcase with a BOM too, just in case. --- src/targets.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/targets.rs b/src/targets.rs index 7152020..9a4d990 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/targets.rs @@ -1246,6 +1246,10 @@ mod tests { Triple::from_str("x86_64-custom‍vendor-linux").is_err(), "zero-width character hazard" ); + assert!( + Triple::from_str("x86_64-customvendor-linux").is_err(), + "BOM hazard" + ); // Test some valid cases. let t = Triple::from_str("x86_64-customvendor-linux") From 721fbbe1c9cfd3adc9aaf011c62d6a36078f4133 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 18 Nov 2019 20:56:40 -0800 Subject: [PATCH 7/7] Use an anonymous function instead of just a local function. --- src/targets.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/targets.rs b/src/targets.rs index 9a4d990..eb5a088 100644 --- a/third_party/rust/target-lexicon-0.9.0/src/targets.rs +++ b/third_party/rust/target-lexicon-0.9.0/src/targets.rs @@ -813,10 +813,9 @@ impl FromStr for Vendor { } // Restrict the set of characters permitted in a custom vendor. - fn is_prohibited_char(c: char) -> bool { + if custom.chars().any(|c: char| { !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.') - } - if custom.chars().any(is_prohibited_char) { + }) { return Err(()); }