#![deny(missing_docs)]
#![cfg_attr(test, deny(warnings))]
#![cfg_attr(feature = "heap_size", feature(custom_derive, plugin))]
#![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))]
#[cfg(feature = "heap_size")]
extern crate heapsize;
use std::ascii::AsciiExt;
use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet};
use std::error::Error as ErrorTrait;
use std::fmt::{self, Display};
use std::iter::FromIterator;
fn is_alphabetic(s: &str) -> bool {
s.chars().all(|x| x >= 'A' && x <= 'Z' || x >= 'a' && x <= 'z')
}
fn is_numeric(s: &str) -> bool {
s.chars().all(|x| x >= '0' && x <= '9')
}
fn is_alphanumeric_or_dash(s: &str) -> bool {
s.chars()
.all(|x| x >= 'A' && x <= 'Z' || x >= 'a' && x <= 'z' || x >= '0' && x <= '9' || x == '-')
}
#[derive(Debug, Eq, PartialEq)]
pub enum Error {
DuplicateExtension,
EmptyExtension,
EmptyPrivateUse,
ForbiddenChar,
InvalidSubtag,
InvalidLanguage,
SubtagTooLong,
TooManyExtlangs,
}
impl ErrorTrait for Error {
fn description(&self) -> &str {
match *self {
Error::DuplicateExtension => "The same extension subtag is only allowed once in a tag",
Error::EmptyExtension => "If an extension subtag is present, it must not be empty",
Error::EmptyPrivateUse => "If the `x` subtag is present, it must not be empty",
Error::ForbiddenChar => "The langtag contains a char not allowed",
Error::InvalidSubtag => "A subtag fails to parse, it does not match any other subtags",
Error::InvalidLanguage => "The given language subtag is invalid",
Error::SubtagTooLong => "A subtag may be eight characters in length at maximum",
Error::TooManyExtlangs => "At maximum three extlangs are allowed",
}
}
}
impl Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.description())
}
}
pub type Result<T> = ::std::result::Result<T, Error>;
pub const GRANDFATHERED: [(&'static str, Option<&'static str>); 26] = [("art-lojban", Some("jbo")),
("cel-gaulish", None),
("en-GB-oed",
Some("en-GB-oxendict")),
("i-ami", Some("ami")),
("i-bnn", Some("bnn")),
("i-default", None),
("i-enochian", None),
("i-hak", Some("hak")),
("i-klingon", Some("tlh")),
("i-lux", Some("lb")),
("i-mingo", None),
("i-navajo", Some("nv")),
("i-pwn", Some("pwn")),
("i-tao", Some("tao")),
("i-tay", Some("tay")),
("i-tsu", Some("tsu")),
("no-bok", Some("nb")),
("no-nyn", Some("nn")),
("sgn-BE-FR", Some("sfb")),
("sgn-BE-NL", Some("vgt")),
("sgn-CH-DE", Some("sgg")),
("zh-guoyu", Some("cmn")),
("zh-hakka", Some("hak")),
("zh-min", None),
("zh-min-nan", Some("nan")),
("zh-xiang", Some("hsn"))];
const DEPRECATED_LANGUAGE: [(&'static str, &'static str); 53] = [("in", "id"),
("iw", "he"),
("ji", "yi"),
("jw", "jv"),
("mo", "ro"),
("aam", "aas"),
("adp", "dz"),
("aue", "ktz"),
("ayx", "nun"),
("bjd", "drl"),
("ccq", "rki"),
("cjr", "mom"),
("cka", "cmr"),
("cmk", "xch"),
("drh", "khk"),
("drw", "prs"),
("gav", "dev"),
("gfx", "vaj"),
("gti", "nyc"),
("hrr", "jal"),
("ibi", "opa"),
("ilw", "gal"),
("kgh", "kml"),
("koj", "kwv"),
("kwq", "yam"),
("kxe", "tvd"),
("lii", "raq"),
("lmm", "rmx"),
("meg", "cir"),
("mst", "mry"),
("mwj", "vaj"),
("myt", "mry"),
("nnx", "ngv"),
("oun", "vaj"),
("pcr", "adx"),
("pmu", "phr"),
("ppr", "lcq"),
("puz", "pub"),
("sca", "hle"),
("thx", "oyb"),
("tie", "ras"),
("tkk", "twm"),
("tlw", "weo"),
("tnf", "prs"),
("tsf", "taj"),
("uok", "ema"),
("xia", "acn"),
("xsj", "suj"),
("ybd", "rki"),
("yma", "lrr"),
("ymt", "mtm"),
("yos", "zom"),
("yuu", "yug")];
const DEPRECATED_REGION: [(&'static str, &'static str); 6] = [("BU", "MM"),
("DD", "DE"),
("FX", "FR"),
("TP", "TL"),
("YD", "YE"),
("ZR", "CD")];
#[derive(Debug, Default, Eq, Clone)]
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
pub struct LanguageTag {
pub language: Option<String>,
pub extlangs: Vec<String>,
pub script: Option<String>,
pub region: Option<String>,
pub variants: Vec<String>,
pub extensions: BTreeMap<u8, Vec<String>>,
pub privateuse: Vec<String>,
}
impl LanguageTag {
pub fn matches(&self, other: &LanguageTag) -> bool {
fn matches_option(a: &Option<String>, b: &Option<String>) -> bool {
match (a, b) {
(&Some(ref a), &Some(ref b)) => a.eq_ignore_ascii_case(b),
(&None, _) => true,
(_, &None) => false,
}
}
fn matches_vec(a: &[String], b: &[String]) -> bool {
a.iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
}
assert!(self.is_language_range());
matches_option(&self.language, &other.language) &&
matches_vec(&self.extlangs, &other.extlangs) &&
matches_option(&self.script, &other.script) &&
matches_option(&self.region, &other.region) &&
matches_vec(&self.variants, &other.variants)
}
pub fn is_language_range(&self) -> bool {
self.extensions.is_empty() && self.privateuse.is_empty()
}
pub fn canonicalize(&self) -> LanguageTag {
if let Some(ref language) = self.language {
if let Some(&(_, Some(tag))) = GRANDFATHERED.iter().find(|&&(x, _)| {
x.eq_ignore_ascii_case(&language)
}) {
return tag.parse().expect("GRANDFATHERED list must contain only valid tags.");
}
}
let mut tag = self.clone();
if !self.extlangs.is_empty() {
tag.language = Some(self.extlangs[0].clone());
tag.extlangs = Vec::new();
}
if let Some(ref language) = self.language {
if let Some(&(_, l)) = DEPRECATED_LANGUAGE.iter().find(|&&(x, _)| {
x.eq_ignore_ascii_case(&language)
}) {
tag.language = Some(l.to_owned());
};
}
if let Some(ref region) = self.region {
if let Some(&(_, r)) = DEPRECATED_REGION.iter().find(|&&(x, _)| {
x.eq_ignore_ascii_case(®ion)
}) {
tag.region = Some(r.to_owned());
};
}
tag.variants = self.variants
.iter()
.map(|variant| {
if "heploc".eq_ignore_ascii_case(variant) {
"alalc97".to_owned()
} else {
variant.clone()
}
})
.collect();
tag
}
}
impl PartialEq for LanguageTag {
fn eq(&self, other: &LanguageTag) -> bool {
fn eq_option(a: &Option<String>, b: &Option<String>) -> bool {
match (a, b) {
(&Some(ref a), &Some(ref b)) => a.eq_ignore_ascii_case(b),
(&None, &None) => true,
_ => false,
}
}
fn eq_vec(a: &[String], b: &[String]) -> bool {
a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| x.eq_ignore_ascii_case(y))
}
eq_option(&self.language, &other.language) && eq_vec(&self.extlangs, &other.extlangs) &&
eq_option(&self.script, &other.script) &&
eq_option(&self.region, &other.region) && eq_vec(&self.variants, &other.variants) &&
BTreeSet::from_iter(&self.extensions) == BTreeSet::from_iter(&other.extensions) &&
self.extensions.keys().all(|a| eq_vec(&self.extensions[a], &other.extensions[a])) &&
eq_vec(&self.privateuse, &other.privateuse)
}
}
fn parse_language_tag(langtag: &mut LanguageTag, t: &str) -> Result<u8> {
let mut position: u8 = 0;
for subtag in t.split('-') {
if subtag.len() > 8 {
return Err(Error::SubtagTooLong);
}
if position == 6 {
langtag.privateuse.push(subtag.to_owned());
} else if subtag.eq_ignore_ascii_case("x") {
position = 6;
} else if position == 0 {
if subtag.len() < 2 || !is_alphabetic(subtag) {
return Err(Error::InvalidLanguage);
}
langtag.language = Some(subtag.to_owned());
if subtag.len() < 4 {
position = 1;
} else {
position = 2;
}
} else if position == 1 && subtag.len() == 3 && is_alphabetic(subtag) {
langtag.extlangs.push(subtag.to_owned());
} else if position <= 2 && subtag.len() == 4 && is_alphabetic(subtag) {
langtag.script = Some(subtag.to_owned());
position = 3;
} else if position <= 3 &&
(subtag.len() == 2 && is_alphabetic(subtag) || subtag.len() == 3 && is_numeric(subtag)) {
langtag.region = Some(subtag.to_owned());
position = 4;
} else if position <= 4 &&
(subtag.len() >= 5 && is_alphabetic(&subtag[0..1]) ||
subtag.len() >= 4 && is_numeric(&subtag[0..1])) {
langtag.variants.push(subtag.to_owned());
position = 4;
} else if subtag.len() == 1 {
position = subtag.as_bytes()[0] as u8;
if langtag.extensions.contains_key(&position) {
return Err(Error::DuplicateExtension);
}
langtag.extensions.insert(position, Vec::new());
} else if position > 6 {
langtag.extensions
.get_mut(&position)
.expect("no entry found for key")
.push(subtag.to_owned());
} else {
return Err(Error::InvalidSubtag);
}
}
Ok(position)
}
impl std::str::FromStr for LanguageTag {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
let t = s.trim();
if !is_alphanumeric_or_dash(t) {
return Err(Error::ForbiddenChar);
}
let mut langtag: LanguageTag = Default::default();
if let Some(&(tag, _)) = GRANDFATHERED.iter().find(|&&(x, _)| x.eq_ignore_ascii_case(t)) {
langtag.language = Some((*tag).to_owned());
return Ok(langtag);
}
let position = try!(parse_language_tag(&mut langtag, t));
if langtag.extensions.values().any(|x| x.is_empty()) {
return Err(Error::EmptyExtension);
}
if position == 6 && langtag.privateuse.is_empty() {
return Err(Error::EmptyPrivateUse);
}
if langtag.extlangs.len() > 2 {
return Err(Error::TooManyExtlangs);
}
Ok(langtag)
}
}
impl fmt::Display for LanguageTag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fn cmp_ignore_ascii_case(a: &u8, b: &u8) -> Ordering {
fn byte_to_uppercase(x: u8) -> u8 {
if x > 96 {
x - 32
} else {
x
}
}
let x: u8 = byte_to_uppercase(*a);
let y: u8 = byte_to_uppercase(*b);
x.cmp(&y)
}
if let Some(ref x) = self.language {
try!(Display::fmt(&x.to_ascii_lowercase()[..], f))
}
for x in &self.extlangs {
try!(write!(f, "-{}", x.to_ascii_lowercase()));
}
if let Some(ref x) = self.script {
let y: String = x.chars()
.enumerate()
.map(|(i, c)| {
if i == 0 {
c.to_ascii_uppercase()
} else {
c.to_ascii_lowercase()
}
})
.collect();
try!(write!(f, "-{}", y));
}
if let Some(ref x) = self.region {
try!(write!(f, "-{}", x.to_ascii_uppercase()));
}
for x in &self.variants {
try!(write!(f, "-{}", x.to_ascii_lowercase()));
}
let mut extensions: Vec<(&u8, &Vec<String>)> = self.extensions.iter().collect();
extensions.sort_by(|&(a, _), &(b, _)| cmp_ignore_ascii_case(a, b));
for (raw_key, values) in extensions {
let mut key = String::new();
key.push(*raw_key as char);
try!(write!(f, "-{}", key));
for value in values {
try!(write!(f, "-{}", value));
}
}
if !self.privateuse.is_empty() {
if self.language.is_none() {
try!(f.write_str("x"));
} else {
try!(f.write_str("-x"));
}
for value in &self.privateuse {
try!(write!(f, "-{}", value));
}
}
Ok(())
}
}
#[macro_export]
macro_rules! langtag {
( $language:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: Vec::new(),
script: None,
region: None,
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;;;$region:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: Vec::new(),
script: None,
region: Some(stringify!($region).to_owned()),
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;;$script:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: Vec::new(),
script: Some(stringify!($script).to_owned()),
region: None,
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;;$script:expr;$region:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: Vec::new(),
script: Some(stringify!($script).to_owned()),
region: Some(stringify!($region).to_owned()),
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;$extlangs:expr) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: vec![stringify!($extlangs).to_owned()],
script: None,
region: None,
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;$extlangs:expr;$script:expr) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: vec![stringify!($extlangs).to_owned()],
script: Some(stringify!($script).to_owned()),
region: None,
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;$extlangs:expr;;$region:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: vec![stringify!($extlangs).to_owned()],
script: None,
region: Some(stringify!($region).to_owned()),
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
( $language:expr;$extlangs:expr;$script:expr;$region:expr ) => {
$crate::LanguageTag {
language: Some(stringify!($language).to_owned()),
extlangs: vec![stringify!($extlangs).to_owned()],
script: Some(stringify!($script).to_owned()),
region: Some(stringify!($region).to_owned()),
variants: Vec::new(),
extensions: ::std::collections::BTreeMap::new(),
privateuse: Vec::new(),
}
};
}