diff --git a/melib/src/utils/mod.rs b/melib/src/utils/mod.rs index 1071943b..e0bd5d1b 100644 --- a/melib/src/utils/mod.rs +++ b/melib/src/utils/mod.rs @@ -37,6 +37,7 @@ pub mod shellexpand; pub mod sqlite3; #[cfg(test)] mod tests; +pub mod urn; pub mod xdg; pub mod html_escape { diff --git a/melib/src/utils/urn/cow.rs b/melib/src/utils/urn/cow.rs new file mode 100644 index 00000000..94914717 --- /dev/null +++ b/melib/src/utils/urn/cow.rs @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2021, 2022, 2023 chayleaf +// + +use core::{ops::Deref, slice::SliceIndex}; + +use super::Result; + +#[allow(clippy::module_name_repetitions)] +pub(super) enum TriCow<'a> { + Owned(String), + Borrowed(&'a str), + MutBorrowed(&'a mut str), +} + +impl Deref for TriCow<'_> { + type Target = str; + fn deref(&self) -> &Self::Target { + match self { + Self::Owned(s) => s, + Self::Borrowed(s) => s, + Self::MutBorrowed(s) => s, + } + } +} + +impl TriCow<'_> { + #[allow(clippy::unnecessary_wraps)] + pub(super) fn replace_range( + &mut self, + range: core::ops::Range, + with: &str, + ) -> Result<()> { + match self { + TriCow::Owned(s) => { + s.replace_range(range, with); + Ok(()) + } + TriCow::Borrowed(s) => { + let mut s = s.to_owned(); + s.replace_range(range, with); + *self = TriCow::Owned(s); + Ok(()) + } + TriCow::MutBorrowed(s) => { + if range.len() == with.len() { + if let Some(slice) = s.get_mut(range.clone()) { + // SAFETY: both slice and with are valid utf-8 strings of same length + unsafe { slice.as_bytes_mut() }.copy_from_slice(with.as_bytes()); + return Ok(()); + } + } + { + let mut s = s.to_owned(); + s.replace_range(range, with); + *self = TriCow::Owned(s); + Ok(()) + } + } + } + } + fn to_mut(&mut self) -> Result<&mut str> { + match self { + TriCow::Owned(s) => Ok(s.as_mut_str()), + TriCow::Borrowed(s) => { + *self = TriCow::Owned(s.to_owned()); + if let TriCow::Owned(s) = self { + Ok(s.as_mut_str()) + } else { + unreachable!("cow isn't owned after making it owned, what happened?") + } + } + TriCow::MutBorrowed(s) => Ok(s), + } + } + /// # Panics + /// Panics if range isn't at valid character boundaries + pub(super) fn make_uppercase(&mut self, range: R) -> Result<()> + where + R: Clone + SliceIndex<[u8], Output = [u8]> + SliceIndex, + { + if self.as_bytes()[range.clone()] + .iter() + .any(u8::is_ascii_lowercase) + { + self.to_mut()?[range].make_ascii_uppercase(); + } + Ok(()) + } + /// # Panics + /// Panics if range isn't at valid character boundaries + pub(super) fn make_lowercase(&mut self, range: R) -> Result<()> + where + R: Clone + SliceIndex<[u8], Output = [u8]> + SliceIndex, + { + if self.as_bytes()[range.clone()] + .iter() + .any(u8::is_ascii_uppercase) + { + // if this isn't ascii, it will fail later + self.to_mut()?[range].make_ascii_lowercase(); + } + Ok(()) + } +} diff --git a/melib/src/utils/urn/mod.rs b/melib/src/utils/urn/mod.rs new file mode 100644 index 00000000..d5fee3dc --- /dev/null +++ b/melib/src/utils/urn/mod.rs @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2021, 2022, 2023 chayleaf +// + +//! [URNs](https://datatracker.ietf.org/doc/html/rfc8141). +//! +//! # Example +//! ``` +//! # use melib::utils::urn::{Urn, UrnSlice, UrnBuilder}; +//! # fn main() -> Result<(), Box> { +//! let urn = UrnBuilder::new("example", "1234:5678").build()?; +//! assert_eq!(urn.as_str(), "urn:example:1234:5678"); +//! assert_eq!(urn, "urn:example:1234:5678".parse::()?); // Using std::str::parse +//! assert_eq!(urn.nss(), "1234:5678"); +//! # Ok(()) +//! # } +//! ``` +#![allow(clippy::missing_panics_doc)] + +use std::{ + borrow::ToOwned, + convert::{TryFrom, TryInto}, + error, fmt, + hash::{self, Hash}, + num::{NonZeroU32, NonZeroU8}, + ops::Range, + str::FromStr, +}; + +mod cow; +use cow::TriCow; + +mod owned; +pub use owned::Urn; + +pub mod percent; +use percent::{parse_f_component, parse_nss, parse_q_component, parse_r_component}; + +mod serde; + +/// Checks whether a string is a valid NID +fn is_valid_nid(s: &str) -> bool { + // RFC8141: + // NID = (alphanum) 0*30(ldh) (alphanum) + // ldh = alphanum / "-" + // + // RFC2141 additionally allows NIDs to end with - + (2..=32).contains(&s.len()) + && !s.starts_with('-') + && s.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'-') +} + +const URN_PREFIX: &str = "urn:"; +const NID_NSS_SEPARATOR: &str = ":"; +const RCOMP_PREFIX: &str = "?+"; +const QCOMP_PREFIX: &str = "?="; +const FCOMP_PREFIX: &str = "#"; + +fn parse_urn(mut s: TriCow) -> Result { + // ensure that the first 4 bytes are a valid substring + if !s.is_char_boundary(URN_PREFIX.len()) { + return Err(Error::InvalidScheme); + } + + s.make_lowercase(..URN_PREFIX.len())?; + + if &s[..URN_PREFIX.len()] != URN_PREFIX { + return Err(Error::InvalidScheme); + } + + let nid_start = URN_PREFIX.len(); + let nid_end = nid_start + + s[nid_start..].find(NID_NSS_SEPARATOR).ok_or_else(|| { + if is_valid_nid(&s[nid_start..]) { + // If NID is present, but the NSS and its separator aren't, it counts as an NSS + // error + Error::InvalidNss + } else { + // the NSS separator couldn't be found, but whatever has been found doesn't even + // count as a valid NID + Error::InvalidNid + } + })?; + + if !is_valid_nid(&s[nid_start..nid_end]) { + return Err(Error::InvalidNid); + } + + // Now that we know the NID is valid, normalize it + s.make_lowercase(nid_start..nid_end)?; + + let nss_start = nid_end + NID_NSS_SEPARATOR.len(); + let nss_end = parse_nss(&mut s, nss_start)?; + + // NSS must be at least one character long + if nss_end == nss_start { + return Err(Error::InvalidNss); + } + + let mut end = nss_end; + let mut last_component_error = Error::InvalidNss; + + let r_component_len = if s[end..].starts_with(RCOMP_PREFIX) { + let rc_start = end + RCOMP_PREFIX.len(); + end = parse_r_component(&mut s, rc_start)?; + last_component_error = Error::InvalidRComponent; + Some( + (end - rc_start) + .try_into() + .ok() + .and_then(NonZeroU32::new) + .ok_or(last_component_error)?, + ) + } else { + None + }; + + let q_component_len = if s[end..].starts_with(QCOMP_PREFIX) { + let qc_start = end + QCOMP_PREFIX.len(); + end = parse_q_component(&mut s, qc_start)?; + last_component_error = Error::InvalidQComponent; + Some( + (end - qc_start) + .try_into() + .ok() + .and_then(NonZeroU32::new) + .ok_or(last_component_error)?, + ) + } else { + None + }; + + if s[end..].starts_with(FCOMP_PREFIX) { + let fc_start = end + FCOMP_PREFIX.len(); + end = parse_f_component(&mut s, fc_start)?; + last_component_error = Error::InvalidFComponent; + } + + if end < s.len() { + return Err(last_component_error); + } + + Ok(UrnSlice { + urn: s, + // unwrap: NID length range is 2..=32 bytes, so it always fits into non-zero u8 + nid_len: NonZeroU8::new((nid_end - nid_start).try_into().unwrap()).unwrap(), + // unwrap: NSS always has non-zero length + nss_len: NonZeroU32::new( + (nss_end - nss_start) + .try_into() + .map_err(|_| Error::InvalidNss)?, + ) + .unwrap(), + r_component_len, + q_component_len, + }) +} + +/// A URN validation error. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Error { + /// The URN has an invalid scheme. + InvalidScheme, + /// The URN has an invalid NID (Namespace ID). + InvalidNid, + /// The URN has an invalid NSS (Namespace-specific string). + InvalidNss, + /// The URN has an invalid r-component. + InvalidRComponent, + /// The URN has an invalid q-component. + InvalidQComponent, + /// The URN has an invalid f-component. + InvalidFComponent, + /// Allocation is required, but not possible. This is only ever created when + /// `alloc` feature is disabled. + AllocRequired, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Self::InvalidScheme => "invalid urn scheme", + Self::InvalidNid => "invalid urn nid (namespace id)", + Self::InvalidNss => "invalid urn nss (namespace-specific string)", + Self::InvalidRComponent => "invalid urn r-component", + Self::InvalidQComponent => "invalid urn q-component", + Self::InvalidFComponent => "invalid urn f-component (fragment)", + Self::AllocRequired => "an allocation was required, but not possible", + }) + } +} + +type Result = core::result::Result; + +impl error::Error for Error {} + +/// A borrowed RFC2141/8141 URN (Uniform Resource Name). This is a copy-on-write +/// type. +/// +/// It will have to allocate if you call any of the setters. If you create it +/// via `TryFrom`, the provided buffer will be used. If you disable +/// `alloc` feature, this stops being a copy-on-write type and starts being a +/// regular borrow. +/// +/// **Note:** the equivalence checks are done +/// [according to the specification](https://www.rfc-editor.org/rfc/rfc8141.html#section-3), +/// only taking the NID and NSS into account! If you need exact equivalence +/// checks, consider comparing using `Urn::as_str()` as the key. Some namespaces +/// may define additional lexical equivalence rules, these aren't accounted for +/// in this implementation (Meaning there might be false negatives for some +/// namespaces). There will, however, be no false positives. +/// +/// Unlike [`Urn`]: +/// - When created via `TryFrom<&str>`, allocations only occur if the URN isn't +/// normalized (uppercase percent-encoded characters and lowercase `urn` +/// scheme and NID) +/// - When created via `TryFrom<&mut str>`, no allocations are done at all. +/// +/// `FromStr` is always required to allocate, so you should use `TryFrom` when +/// possible. +pub struct UrnSlice<'a> { + // Entire URN string + urn: TriCow<'a>, + nid_len: NonZeroU8, + nss_len: NonZeroU32, + r_component_len: Option, + q_component_len: Option, +} + +impl<'a> UrnSlice<'a> { + const fn nid_range(&self) -> Range { + // urn: + let start = URN_PREFIX.len(); + start..start + self.nid_len.get() as usize + } + + const fn nss_range(&self) -> Range { + // ...: + let start = self.nid_range().end + NID_NSS_SEPARATOR.len(); + start..start + self.nss_len.get() as usize + } + + fn r_component_range(&self) -> Option> { + self.r_component_len.map(|r_component_len| { + // ...[?+] + let start = self.nss_range().end + RCOMP_PREFIX.len(); + start..start + r_component_len.get() as usize + }) + } + + /// end of the last component before q-component + fn pre_q_component_end(&self) -> usize { + self.r_component_range() + .unwrap_or_else(|| self.nss_range()) + .end + } + + fn q_component_range(&self) -> Option> { + self.q_component_len.map(|q_component_len| { + // ...[?+][?=] + let start = self.pre_q_component_end() + QCOMP_PREFIX.len(); + start..start + q_component_len.get() as usize + }) + } + + /// end of the last component before f-component + fn pre_f_component_end(&self) -> usize { + self.q_component_range() + .or_else(|| self.r_component_range()) + .unwrap_or_else(|| self.nss_range()) + .end + } + + fn f_component_start(&self) -> Option { + // ...[#] + Some(self.pre_f_component_end()) + .filter(|x| *x < self.urn.len()) + .map(|x| x + FCOMP_PREFIX.len()) + } + + /// String representation of this URN (Normalized). + #[must_use] + pub fn as_str(&self) -> &str { + &self.urn + } + + /// NID (Namespace identifier), the first part of the URN. + /// + /// For example, in `urn:ietf:rfc:2648`, `ietf` is the namespace. + #[must_use] + pub fn nid(&self) -> &str { + &self.urn[self.nid_range()] + } + /// Set the NID (must be [a valid + /// NID](https://datatracker.ietf.org/doc/html/rfc8141#section-2)). + /// + /// # Errors + /// Returns [`Error::InvalidNid`] in case of a validation failure. + pub fn set_nid(&mut self, nid: &str) -> Result<()> { + if !is_valid_nid(nid) { + return Err(Error::InvalidNid); + } + let mut nid = TriCow::Borrowed(nid); + nid.make_lowercase(..)?; + let range = self.nid_range(); + self.urn.replace_range(range, &nid)?; + // unwrap: NID length range is 2..=32 bytes, so it always fits into non-zero u8 + self.nid_len = NonZeroU8::new(nid.len().try_into().unwrap()).unwrap(); + Ok(()) + } + /// Percent-encoded NSS (Namespace-specific string) identifying the + /// resource. + /// + /// For example, in `urn:ietf:rfc:2648`, `rfs:2648` is the NSS. + /// + /// # See also + /// - [`percent::decode_nss`] + #[must_use] + pub fn nss(&self) -> &str { + &self.urn[self.nss_range()] + } + /// Set the NSS (must be [a valid NSS](https://datatracker.ietf.org/doc/html/rfc8141#section-2) + /// and use percent-encoding). + /// + /// # Errors + /// Returns [`Error::InvalidNss`] in case of a validation failure. + /// + /// # See also + /// - [`percent::encode_nss`] + pub fn set_nss(&mut self, nss: &str) -> Result<()> { + let mut nss = TriCow::Borrowed(nss); + if nss.is_empty() || parse_nss(&mut nss, 0)? != nss.len() { + return Err(Error::InvalidNss); + } + // unwrap: NSS length is non-zero as checked above + let nss_len = + NonZeroU32::new(nss.len().try_into().map_err(|_| Error::InvalidNss)?).unwrap(); + let range = self.nss_range(); + self.urn.replace_range(range, &nss)?; + self.nss_len = nss_len; + Ok(()) + } + /// Percent-encoded r-component, following the `?+` character sequence, to + /// be used for passing parameters to URN resolution services. + /// + /// In `urn:example:foo-bar-baz-qux?+CCResolve:cc=uk`, the r-component is + /// `CCResolve:cc=uk`. + /// + /// Should not be used for equivalence checks. As of the time of writing + /// this, exact semantics aren't in the RFC. + /// + /// # See also + /// - [`percent::decode_r_component`] + #[must_use] + pub fn r_component(&self) -> Option<&str> { + self.r_component_range().map(|range| &self.urn[range]) + } + /// Set the r-component (must be [a valid + /// r-component](https://datatracker.ietf.org/doc/html/rfc8141#section-2) and use + /// percent-encoding). + /// + /// # Errors + /// Returns [`Error::InvalidRComponent`] in case of a validation failure. + /// + /// # See also + /// - [`percent::encode_r_component`] + pub fn set_r_component(&mut self, r_component: Option<&str>) -> Result<()> { + if let Some(rc) = r_component { + let mut rc = TriCow::Borrowed(rc); + if rc.is_empty() || parse_r_component(&mut rc, 0)? != rc.len() { + return Err(Error::InvalidRComponent); + } + let rc_len = rc.len().try_into().map_err(|_| Error::InvalidRComponent)?; + let range = if let Some(range) = self.r_component_range() { + range + } else { + // insert RCOMP_PREFIX if r-component doesn't already exist + let nss_end = self.nss_range().end; + self.urn.replace_range(nss_end..nss_end, RCOMP_PREFIX)?; + nss_end + RCOMP_PREFIX.len()..nss_end + RCOMP_PREFIX.len() + }; + self.urn.replace_range(range, &rc)?; + self.r_component_len = Some(NonZeroU32::new(rc_len).unwrap()); + } else if let Some(mut range) = self.r_component_range() { + range.start -= RCOMP_PREFIX.len(); + self.urn.replace_range(range, "")?; + self.r_component_len = None; + } + Ok(()) + } + /// Percent-encoded q-component, following the `?=` character sequence. Has + /// a similar function to the URL query string. + /// + /// In `urn:example:weather?=op=map&lat=39.56&lon=-104.85`, + /// the q-component is `op=map&lat=39.56&lon=-104.85`. + /// + /// Should not be used for equivalence checks. + /// + /// # See also + /// - [`percent::decode_q_component`] + #[must_use] + pub fn q_component(&self) -> Option<&str> { + self.q_component_range().map(|range| &self.urn[range]) + } + /// Set the q-component (must be [a valid + /// q-component](https://datatracker.ietf.org/doc/html/rfc8141#section-2) and use + /// percent-encoding). + /// + /// # Errors + /// Returns [`Error::InvalidQComponent`] in case of a validation failure. + /// + /// # See also + /// - [`percent::encode_q_component`] + pub fn set_q_component(&mut self, q_component: Option<&str>) -> Result<()> { + if let Some(qc) = q_component { + let mut qc = TriCow::Borrowed(qc); + if qc.is_empty() || parse_q_component(&mut qc, 0)? != qc.len() { + return Err(Error::InvalidQComponent); + } + let qc_len = qc.len().try_into().map_err(|_| Error::InvalidQComponent)?; + let range = if let Some(range) = self.q_component_range() { + range + } else { + // insert QCOMP_PREFIX if q-component doesn't already exist + let pre_qc_end = self.pre_q_component_end(); + self.urn + .replace_range(pre_qc_end..pre_qc_end, QCOMP_PREFIX)?; + pre_qc_end + QCOMP_PREFIX.len()..pre_qc_end + QCOMP_PREFIX.len() + }; + self.urn.replace_range(range, &qc)?; + self.q_component_len = Some(NonZeroU32::new(qc_len).unwrap()); + } else if let Some(mut range) = self.q_component_range() { + range.start -= QCOMP_PREFIX.len(); + self.urn.replace_range(range, "")?; + self.q_component_len = None; + } + Ok(()) + } + /// Percent-encoded f-component following the `#` character at the end of + /// the URN. Has a similar function to the URL fragment. + /// + /// In `urn:example:a123,z456#789`, the f-component is `789`. + /// + /// Should not be used for equivalence checks. + /// + /// # See also + /// - [`percent::decode_f_component`] + #[must_use] + pub fn f_component(&self) -> Option<&str> { + self.f_component_start().map(|start| &self.urn[start..]) + } + /// Set the f-component (must be [a valid + /// f-component](https://datatracker.ietf.org/doc/html/rfc8141#section-2) and use + /// percent-encoding). + /// + /// # Errors + /// Returns [`Error::InvalidFComponent`] in case of a validation failure. + /// + /// # See also + /// - [`percent::encode_f_component`] + pub fn set_f_component(&mut self, f_component: Option<&str>) -> Result<()> { + if let Some(fc) = f_component { + let mut fc = TriCow::Borrowed(fc); + if parse_f_component(&mut fc, 0)? != fc.len() { + return Err(Error::InvalidFComponent); + } + let start = if let Some(start) = self.f_component_start() { + start + } else { + let range = self.urn.len()..self.urn.len(); + self.urn.replace_range(range, FCOMP_PREFIX)?; + self.urn.len() + }; + let len = self.urn.len(); + self.urn.replace_range(start..len, &fc)?; + } else if let Some(start) = self.f_component_start() { + let len = self.urn.len(); + self.urn + .replace_range(start - FCOMP_PREFIX.len()..len, "")?; + } + Ok(()) + } +} + +impl<'a> ToOwned for UrnSlice<'a> { + type Owned = Urn; + fn to_owned(&self) -> Self::Owned { + Urn::from(self) + } +} + +impl fmt::Debug for UrnSlice<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "UrnSlice({})", self.as_str()) + } +} + +impl PartialEq for UrnSlice<'_> { + fn eq(&self, other: &Urn) -> bool { + self == &other.0 + } +} + +impl AsRef<[u8]> for UrnSlice<'_> { + fn as_ref(&self) -> &[u8] { + self.urn.as_bytes() + } +} + +impl AsRef for UrnSlice<'_> { + fn as_ref(&self) -> &str { + &self.urn + } +} + +impl PartialEq for UrnSlice<'_> { + fn eq(&self, other: &Self) -> bool { + self.urn[..self.nss_range().end] == other.urn[..other.nss_range().end] + } +} + +impl Eq for UrnSlice<'_> {} + +impl Hash for UrnSlice<'_> { + fn hash(&self, state: &mut H) { + self.urn[..self.nss_range().end].hash(state); + } +} + +impl fmt::Display for UrnSlice<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.write_str(&self.urn) + } +} + +impl FromStr for UrnSlice<'_> { + type Err = Error; + fn from_str(s: &str) -> Result { + parse_urn(TriCow::Owned(s.to_owned())) + } +} + +impl<'a> TryFrom<&'a str> for UrnSlice<'a> { + type Error = Error; + fn try_from(value: &'a str) -> Result { + parse_urn(TriCow::Borrowed(value)) + } +} + +impl<'a> TryFrom<&'a mut str> for UrnSlice<'a> { + type Error = Error; + fn try_from(value: &'a mut str) -> Result { + parse_urn(TriCow::MutBorrowed(value)) + } +} + +impl TryFrom for UrnSlice<'static> { + type Error = Error; + fn try_from(value: String) -> Result { + parse_urn(TriCow::Owned(value)) + } +} + +/// A struct used for constructing URNs. +/// +/// # Example +/// ``` +/// # use melib::utils::urn::{Urn, UrnBuilder}; +/// # fn main() -> Result<(), Box> { +/// let urn = UrnBuilder::new("example", "1234:5678").build()?; +/// assert_eq!(urn.as_str(), "urn:example:1234:5678"); +/// assert_eq!(urn, "urn:example:1234:5678".parse::()?); // Using std::str::parse +/// assert_eq!(urn.nss(), "1234:5678"); +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug)] +#[must_use] +pub struct UrnBuilder<'a> { + nid: &'a str, + nss: &'a str, + r_component: Option<&'a str>, + q_component: Option<&'a str>, + f_component: Option<&'a str>, +} + +impl<'a> UrnBuilder<'a> { + /// Create a new `UrnBuilder`. + /// + /// - `nid`: the namespace identifier + /// - `nss`: the percent-encoded NSS (namespace-specific string) + /// + /// # See also + /// - [`percent::encode_nss`] + pub fn new(nid: &'a str, nss: &'a str) -> Self { + Self { + nid, + nss, + r_component: None, + q_component: None, + f_component: None, + } + } + /// Change the namespace identifier. + pub fn nid(mut self, nid: &'a str) -> Self { + self.nid = nid; + self + } + /// Change the namespace-specific string (must be percent encoded). + /// + /// # See also + /// - [`percent::encode_nss`] + pub fn nss(mut self, nss: &'a str) -> Self { + self.nss = nss; + self + } + /// Change the r-component (must be percent encoded). + /// + /// # See also + /// - [`percent::encode_r_component`] + pub fn r_component(mut self, r_component: Option<&'a str>) -> Self { + self.r_component = r_component; + self + } + /// Change the q-component (must be percent encoded). + /// + /// # See also + /// - [`percent::encode_q_component`] + pub fn q_component(mut self, q_component: Option<&'a str>) -> Self { + self.q_component = q_component; + self + } + /// Change the f-component (must be percent encoded). + /// + /// # See also + /// - [`percent::encode_f_component`] + pub fn f_component(mut self, f_component: Option<&'a str>) -> Self { + self.f_component = f_component; + self + } + /// [Validate the data](https://datatracker.ietf.org/doc/html/rfc8141#section-2) and create the URN. + /// + /// # Errors + /// + /// In case of a validation failure, returns an error specifying the + /// component that failed validation + pub fn build(self) -> Result { + fn cow_push_str(c: &mut TriCow, s: &str) { + if let TriCow::Owned(c) = c { + c.push_str(s); + } else { + unreachable!("cow must be owned to use this function") + } + } + if !is_valid_nid(self.nid) { + return Err(Error::InvalidNid); + } + let mut s = TriCow::Owned(URN_PREFIX.to_owned()); + { + let s = &mut s; + cow_push_str(s, self.nid); + cow_push_str(s, NID_NSS_SEPARATOR); + let nss_start = s.len(); + cow_push_str(s, self.nss); + if self.nss.is_empty() || parse_nss(s, nss_start)? != s.len() { + return Err(Error::InvalidNss); + } + if let Some(rc) = self.r_component { + cow_push_str(s, RCOMP_PREFIX); + let rc_start = s.len(); + cow_push_str(s, rc); + if rc.is_empty() || parse_r_component(s, rc_start)? != s.len() { + return Err(Error::InvalidRComponent); + } + } + if let Some(qc) = self.q_component { + cow_push_str(s, QCOMP_PREFIX); + let qc_start = s.len(); + cow_push_str(s, qc); + if qc.is_empty() || parse_q_component(s, qc_start)? != s.len() { + return Err(Error::InvalidQComponent); + } + } + if let Some(fc) = self.f_component { + cow_push_str(s, FCOMP_PREFIX); + let fc_start = s.len(); + cow_push_str(s, fc); + if parse_f_component(s, fc_start)? != s.len() { + return Err(Error::InvalidFComponent); + } + } + } + Ok(Urn(UrnSlice { + // we already had to allocate since we use a builder, obviously allocations are allowed + urn: s, + // unwrap: NID length range is 2..=32 bytes, so it always fits into non-zero u8 + nid_len: NonZeroU8::new(self.nid.len().try_into().unwrap()).unwrap(), + // unwrap: NSS length is non-zero as checked above + nss_len: NonZeroU32::new(self.nss.len().try_into().map_err(|_| Error::InvalidNss)?) + .unwrap(), + r_component_len: self + .r_component + .map(|x| { + x.len() + .try_into() + // unwrap: r-component has non-zero length as checked above + .map(|x| NonZeroU32::new(x).unwrap()) + .map_err(|_| Error::InvalidRComponent) + }) + .transpose()?, + q_component_len: self + .q_component + .map(|x| { + x.len() + .try_into() + // unwrap: q-component has non-zero length as checked above + .map(|x| NonZeroU32::new(x).unwrap()) + .map_err(|_| Error::InvalidQComponent) + }) + .transpose()?, + })) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + UrnSlice::try_from("6򭞦*�").unwrap_err(); + assert_eq!( + UrnSlice::try_from("urn:nbn:de:bvb:19-146642").unwrap(), + UrnBuilder::new("nbn", "de:bvb:19-146642").build().unwrap() + ); + assert_eq!( + UrnSlice::try_from("urn:nbn:de:bvb:19-146642") + .unwrap() + .to_string(), + "urn:nbn:de:bvb:19-146642" + ); + + assert_eq!( + UrnSlice::try_from("urn:example:foo-bar-baz-qux?+CCResolve:cc=uk#test").unwrap(), + UrnBuilder::new("example", "foo-bar-baz-qux") + .r_component(Some("CCResolve:cc=uk")) + .f_component(Some("test")) + .build() + .unwrap() + ); + assert_eq!( + UrnSlice::try_from("urn:example:foo-bar-baz-qux?+CCResolve:cc=uk#test") + .unwrap() + .f_component() + .unwrap(), + "test" + ); + assert_eq!( + UrnSlice::try_from("urn:example:foo-bar-baz-qux?+CCResolve:cc=uk#test") + .unwrap() + .r_component() + .unwrap(), + "CCResolve:cc=uk" + ); + assert_eq!( + UrnSlice::try_from("urn:example:foo-bar-baz-qux?+CCResolve:cc=uk#test") + .unwrap() + .to_string(), + "urn:example:foo-bar-baz-qux?+CCResolve:cc=uk#test", + ); + + assert_eq!( + "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z" + .parse::() + .unwrap(), + UrnBuilder::new("example", "weather") + .q_component(Some( + "op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z" + )) + .build() + .unwrap() + ); + assert_eq!( + UrnSlice::try_from( + "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z" + ) + .unwrap() + .to_string(), + "urn:example:weather?=op=map&lat=39.56&lon=-104.85&datetime=1969-07-21T02:56:15Z" + ); + + assert_eq!( + "uRn:eXaMpLe:%3d%3a?=aoiwnfuafo" + .parse::() + .unwrap(), + UrnBuilder::new("example", "%3D%3a").build().unwrap() + ); + let mut arr = *b"uRn:eXaMpLe:%3d%3a?=aoiwnfuafo"; + assert_eq!( + UrnSlice::try_from(core::str::from_utf8_mut(&mut arr[..]).unwrap()) + .unwrap() + .as_str(), + "urn:example:%3D%3A?=aoiwnfuafo", + ); + + assert_eq!( + UrnSlice::try_from("urn:-example:abcd"), + Err(Error::InvalidNid) + ); + assert_eq!( + UrnSlice::try_from("urn:example:/abcd"), + Err(Error::InvalidNss) + ); + assert_eq!(UrnSlice::try_from("urn:a:abcd"), Err(Error::InvalidNid)); + assert_eq!( + UrnSlice::try_from("urn:0123456789abcdef0123456789abcdef0:abcd"), + Err(Error::InvalidNid) + ); + let _ = UrnSlice::try_from("urn:0123456789abcdef0123456789abcdef:abcd").unwrap(); + assert_eq!(UrnSlice::try_from("urn:example"), Err(Error::InvalidNss)); + assert_eq!(UrnSlice::try_from("urn:example:"), Err(Error::InvalidNss)); + assert_eq!(UrnSlice::try_from("urn:example:%"), Err(Error::InvalidNss)); + assert_eq!(UrnSlice::try_from("urn:example:%a"), Err(Error::InvalidNss)); + assert_eq!( + UrnSlice::try_from("urn:example:%a_"), + Err(Error::InvalidNss) + ); + let mut arr = *b"urn:example:%a0?+"; + assert_eq!( + UrnSlice::try_from(core::str::from_utf8_mut(&mut arr[..]).unwrap()), + Err(Error::InvalidRComponent) + ); + let mut arr = *b"urn:example:%a0?+%a0?="; + assert_eq!( + UrnSlice::try_from(core::str::from_utf8_mut(&mut arr[..]).unwrap()), + Err(Error::InvalidQComponent) + ); + let mut arr = *b"urn:example:%a0?+%a0?=a"; + assert_eq!( + UrnSlice::try_from(core::str::from_utf8_mut(&mut arr[..]).unwrap()) + .unwrap() + .r_component() + .unwrap(), + "%A0", + ); + + { + let mut urn = "urn:example:test".parse::().unwrap(); + urn.set_f_component(Some("f-component")).unwrap(); + assert_eq!(urn.f_component(), Some("f-component")); + assert_eq!(urn.as_str(), "urn:example:test#f-component"); + urn.set_f_component(Some("")).unwrap(); + assert_eq!(urn.f_component(), Some("")); + assert_eq!(urn.as_str(), "urn:example:test#"); + urn.set_q_component(Some("abcd")).unwrap(); + assert_eq!(urn.q_component(), Some("abcd")); + assert_eq!(urn.as_str(), "urn:example:test?=abcd#"); + assert!(urn.set_q_component(Some("")).is_err()); + urn.set_r_component(Some("%2a")).unwrap(); + assert_eq!(urn.r_component(), Some("%2A")); + assert_eq!(urn.as_str(), "urn:example:test?+%2A?=abcd#"); + urn.set_nid("a-b").unwrap(); + assert_eq!(urn.as_str(), "urn:a-b:test?+%2A?=abcd#"); + urn.set_r_component(None).unwrap(); + assert_eq!(urn.as_str(), "urn:a-b:test?=abcd#"); + assert_eq!(urn.r_component(), None); + } + } +} diff --git a/melib/src/utils/urn/owned.rs b/melib/src/utils/urn/owned.rs new file mode 100644 index 00000000..d28eddd5 --- /dev/null +++ b/melib/src/utils/urn/owned.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2021, 2022, 2023 chayleaf +// + +use std::{ + borrow::{Borrow, BorrowMut}, + fmt, + ops::{Deref, DerefMut}, + str::FromStr, +}; + +use super::{Error, Result, TriCow, UrnSlice}; + +/// An owned RFC2141/8141 URN (Uniform Resource Name). +/// +/// **Note:** the equivalence checks are done +/// [according to the specification](https://www.rfc-editor.org/rfc/rfc8141.html#section-3), +/// only taking the NID and NSS into account! If you need exact equivalence +/// checks, consider comparing using `Urn::as_str()` as the key. Some namespaces +/// may define additional lexical equivalence rules, these aren't accounted for +/// in this implementation (Meaning there might be false negatives for some +/// namespaces). There will, however, be no false positives. +/// +/// `FromStr` requires a single allocation, but `TryFrom` doesn't, so +/// prefer `TryFrom` when possible. +#[repr(transparent)] +#[derive(Eq, Hash, PartialEq)] +pub struct Urn(pub UrnSlice<'static>); + +impl<'a> Borrow> for Urn { + fn borrow(&self) -> &UrnSlice<'a> { + &self.0 + } +} + +impl BorrowMut> for Urn { + fn borrow_mut(&mut self) -> &mut UrnSlice<'static> { + &mut self.0 + } +} + +impl<'a> From> for Urn { + fn from(value: UrnSlice<'a>) -> Self { + Self(UrnSlice { + urn: match value.urn { + TriCow::Owned(s) => TriCow::Owned(s), + TriCow::Borrowed(s) => TriCow::Owned(s.to_owned()), + TriCow::MutBorrowed(s) => TriCow::Owned(s.to_owned()), + }, + nid_len: value.nid_len, + nss_len: value.nss_len, + q_component_len: value.q_component_len, + r_component_len: value.r_component_len, + }) + } +} + +impl<'a> From<&UrnSlice<'a>> for Urn { + fn from(value: &UrnSlice<'a>) -> Self { + Self(UrnSlice { + urn: match &value.urn { + TriCow::Owned(s) => TriCow::Owned(s.clone()), + TriCow::Borrowed(s) => TriCow::Owned((*s).to_owned()), + TriCow::MutBorrowed(s) => TriCow::Owned((*s).to_owned()), + }, + nid_len: value.nid_len, + nss_len: value.nss_len, + q_component_len: value.q_component_len, + r_component_len: value.r_component_len, + }) + } +} + +impl<'a> From<&mut UrnSlice<'a>> for Urn { + fn from(value: &mut UrnSlice<'a>) -> Self { + Self(UrnSlice { + urn: match &value.urn { + TriCow::Owned(s) => TriCow::Owned(s.clone()), + TriCow::Borrowed(s) => TriCow::Owned((*s).to_owned()), + TriCow::MutBorrowed(s) => TriCow::Owned((*s).to_owned()), + }, + nid_len: value.nid_len, + nss_len: value.nss_len, + q_component_len: value.q_component_len, + r_component_len: value.r_component_len, + }) + } +} + +impl Clone for Urn { + fn clone(&self) -> Self { + self.0.to_owned() + } +} + +impl fmt::Debug for Urn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Urn({})", self.as_str()) + } +} + +impl Deref for Urn { + type Target = UrnSlice<'static>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Urn { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<'a> AsRef> for Urn { + fn as_ref(&self) -> &UrnSlice<'a> { + &self.0 + } +} + +impl AsMut> for Urn { + fn as_mut(&mut self) -> &mut UrnSlice<'static> { + &mut self.0 + } +} + +impl<'a> PartialEq> for Urn { + fn eq(&self, other: &UrnSlice<'a>) -> bool { + &self.0 == other + } +} + +impl AsRef<[u8]> for Urn { + fn as_ref(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl AsRef for Urn { + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +impl fmt::Display for Urn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + self.0.fmt(f) + } +} + +impl FromStr for Urn { + type Err = Error; + fn from_str(s: &str) -> Result { + Ok(Self(UrnSlice::from_str(s)?)) + } +} + +impl<'a> TryFrom<&'a str> for Urn { + type Error = Error; + fn try_from(value: &'a str) -> Result { + Ok(Self(UrnSlice::try_from(value.to_owned())?)) + } +} + +impl<'a> TryFrom<&'a mut str> for Urn { + type Error = Error; + fn try_from(value: &'a mut str) -> Result { + Ok(Self(UrnSlice::try_from(value.to_owned())?)) + } +} + +impl TryFrom for Urn { + type Error = Error; + fn try_from(value: String) -> Result { + Ok(Self(UrnSlice::try_from(value)?)) + } +} diff --git a/melib/src/utils/urn/percent.rs b/melib/src/utils/urn/percent.rs new file mode 100644 index 00000000..50e5bb9b --- /dev/null +++ b/melib/src/utils/urn/percent.rs @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2021, 2022, 2023 chayleaf +// + +//! This module contains functions for percent-encoding and decoding various +//! components of a URN. + +use super::{Error, Result, TriCow}; + +/// Different components are percent-encoded differently... +#[derive(Clone, Copy)] +enum PctEncoded { + Nss, + RComponent, + QComponent, + FComponent, +} + +/// Parse and normalize percent-encoded string. Returns the end. +fn parse(s: &mut TriCow, start: usize, kind: PctEncoded) -> Result { + let mut it = s.bytes().enumerate().skip(start).peekable(); + while let Some((i, ch)) = it.next() { + #[allow(clippy::match_same_arms)] + match (kind, ch) { + /* question mark handling */ + // ? is always allowed in f-components + (PctEncoded::FComponent, b'?') => {} + // ? is a valid part of q-component if not at the start + (PctEncoded::QComponent, b'?') if i != start => {} + // ? is a valid part of r-component if not at the start, but ?= indicates the q-component start, so only allow the ? if it isn't followed by = + (PctEncoded::RComponent, b'?') if i != start && it.peek().map(|x| x.1) != Some(b'=') => {} + /* slash handling */ + // slash is uniquely allowed at the start of f-component... + (PctEncoded::FComponent, b'/') => {} + // ...but it's allowed everywhere if it isn't at the start + (_, b'/') if i != start => {} + /* the rest is handled the same everywhere */ + // various symbols that are allowed as pchar + ( + _, + // unreserved = ALPHA / DIGIT / + b'-' | b'.' | b'_' | b'~' + // sub-delims = + | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'=' + // pchar = unreserved / pct-encoded / sub-delims / + | b':' | b'@', + ) => {} + // pct-encoded = "%" HEXDIG HEXDIG + // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + // (ABNF strings are case insensitive) + (_, b'%') => { + let mut pct_chars = it.take(2); + if pct_chars.len() == 2 && pct_chars.all(|x| x.1.is_ascii_hexdigit()) { + // percent encoding must be normalized by uppercasing it + s.make_uppercase(i + 1..i + 3)?; + it = s.bytes().enumerate().skip(i + 3).peekable(); + } else { + return Ok(i); + } + } + // ALPHA / DIGIT + (_, c) if c.is_ascii_alphanumeric() => {} + // other characters can't be part of this component, so this is the end + _ => return Ok(i), + } + } + // this was the last component! + Ok(s.len()) +} + +/// Returns the NSS end +pub(super) fn parse_nss(s: &mut TriCow, start: usize) -> Result { + parse(s, start, PctEncoded::Nss) +} +/// Returns the r-component end +pub(super) fn parse_r_component(s: &mut TriCow, start: usize) -> Result { + parse(s, start, PctEncoded::RComponent) +} +/// Returns the q-component end +pub(super) fn parse_q_component(s: &mut TriCow, start: usize) -> Result { + parse(s, start, PctEncoded::QComponent) +} +/// Returns the f-component end +pub(super) fn parse_f_component(s: &mut TriCow, start: usize) -> Result { + parse(s, start, PctEncoded::FComponent) +} + +/// must be a hex digit +const fn parse_hex_char(ch: u8) -> u8 { + if ch.is_ascii_digit() { + ch - b'0' + } else if ch.is_ascii_lowercase() { + ch - b'a' + 0xA + } else { + ch - b'A' + 0xA + } +} + +fn decode(s: &str, kind: PctEncoded) -> Option { + #[allow(clippy::iter_skip_zero)] + let mut it = s.bytes().enumerate().skip(0).peekable(); + let mut ret = Vec::new(); + + while let Some((i, ch)) = it.next() { + #[allow(clippy::match_same_arms)] + match (kind, ch) { + (PctEncoded::FComponent, b'?') => {} + (PctEncoded::QComponent, b'?') if i != 0 => {} + (PctEncoded::RComponent, b'?') if i != 0 && it.peek().map(|x| x.1) != Some(b'=') => {} + (PctEncoded::FComponent, b'/') => {} + (_, b'/') if i != 0 => {} + ( + _, + b'-' | b'.' | b'_' | b'~' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' + | b',' | b';' | b'=' | b':' | b'@', + ) => {} + (_, b'%') => { + let mut pct_chars = it.take(2); + if pct_chars.len() == 2 && pct_chars.all(|x| x.1.is_ascii_hexdigit()) { + ret.push( + parse_hex_char(s.as_bytes()[i + 1]) * 0x10 + + parse_hex_char(s.as_bytes()[i + 2]), + ); + it = s.bytes().enumerate().skip(i + 3).peekable(); + continue; + } + return None; + } + (_, c) if c.is_ascii_alphanumeric() => {} + _ => return None, + } + ret.push(ch); + } + String::from_utf8(ret).ok() +} + +/// Percent-decode a NSS according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, Urn}; +/// # fn test_main() -> Result<(), urn::Error> { +/// let urn = Urn::try_from("urn:example:string%20with%20spaces")?; +/// +/// assert_eq!(urn::percent::decode_nss(urn.nss())?, "string with spaces"); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidNss`] in case of a validation failure. +pub fn decode_nss(s: &str) -> Result { + decode(s, PctEncoded::Nss).ok_or(Error::InvalidNss) +} +/// Percent-decode an r-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, Urn}; +/// # fn test_main() -> Result<(), urn::Error> { +/// let urn = Urn::try_from("urn:example:nss?+this%20is%20the%20r-component!")?; +/// +/// assert_eq!( +/// urn::percent::decode_r_component(urn.r_component().unwrap())?, +/// "this is the r-component!" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidRComponent`] in case of a validation failure. +pub fn decode_r_component(s: &str) -> Result { + decode(s, PctEncoded::RComponent).ok_or(Error::InvalidRComponent) +} +/// Percent-decode a q-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, Urn}; +/// # fn test_main() -> Result<(), urn::Error> { +/// let urn = Urn::try_from("urn:example:nss?=this%20is%20the%20q-component!")?; +/// +/// assert_eq!( +/// urn::percent::decode_q_component(urn.q_component().unwrap())?, +/// "this is the q-component!" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidQComponent`] in case of a validation failure. +pub fn decode_q_component(s: &str) -> Result { + decode(s, PctEncoded::QComponent).ok_or(Error::InvalidQComponent) +} +/// Percent-decode an f-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, Urn}; +/// # fn test_main() -> Result<(), urn::Error> { +/// let urn = Urn::try_from("urn:example:nss#f-component%20test")?; +/// +/// assert_eq!( +/// urn::percent::decode_f_component(urn.f_component().unwrap())?, +/// "f-component test" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidFComponent`] in case of a validation failure. +pub fn decode_f_component(s: &str) -> Result { + decode(s, PctEncoded::FComponent).ok_or(Error::InvalidFComponent) +} + +const fn to_hex(n: u8) -> [u8; 2] { + let a = (n & 0xF0) >> 4; + let b = n & 0xF; + let a = if a < 10 { b'0' + a } else { b'A' + (a - 10) }; + let b = if b < 10 { b'0' + b } else { b'A' + (b - 10) }; + [a, b] +} + +fn encode(s: &str, kind: PctEncoded) -> String { + let mut ret = String::with_capacity(s.len()); + let mut buf = [0u8; 8]; + for (i, ch) in s.chars().enumerate() { + #[allow(clippy::match_same_arms)] + match (kind, ch) { + // ? and / are reserved chars in RFC2141, so they can be included + (PctEncoded::FComponent, '?') => {} + (PctEncoded::QComponent, '?') if i != 0 => {} + (PctEncoded::RComponent, '?') + if i != 0 && !matches!(s.chars().nth(i + 1), Some('=')) => {} + (PctEncoded::FComponent, '/') => {} + // For RFC2141 compatibility, omit / in NSS + (PctEncoded::RComponent | PctEncoded::QComponent, '/') if i != 0 => {} + // & is reserved in RFC2141, but ~ isn't, omit it + ( + PctEncoded::RComponent | PctEncoded::QComponent | PctEncoded::FComponent, + '-' | '.' | '_' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' + | ':' | '@', + ) => {} + // In NSS, omit both ~ and & + ( + PctEncoded::Nss, + '-' | '.' | '_' | '!' | '$' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | ':' + | '@', + ) => {} + (_, ch) if ch.is_ascii_alphanumeric() => {} + (_, ch) => { + for byte in ch.encode_utf8(&mut buf).as_bytes() { + ret.push('%'); + for digit in to_hex(*byte) { + ret.push(digit as char); + } + } + continue; + } + } + ret.push(ch); + } + ret +} + +/// Percent-decode a NSS according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, UrnBuilder}; +/// # fn test_main() -> Result<(), urn::Error> { +/// assert_eq!( +/// UrnBuilder::new("example", &urn::percent::encode_nss("test nss")?) +/// .build()? +/// .as_str(), +/// "urn:example:test%20nss" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidNss`] when attempting to encode an empty string. +pub fn encode_nss(s: &str) -> Result { + if s.is_empty() { + return Err(Error::InvalidNss); + } + Ok(encode(s, PctEncoded::Nss)) +} +/// Percent-decode an r-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, UrnBuilder}; +/// # fn test_main() -> Result<(), urn::Error> { +/// assert_eq!( +/// UrnBuilder::new("example", "nss") +/// .r_component(Some(&urn::percent::encode_r_component("😂😂💯")?)) +/// .build()? +/// .as_str(), +/// "urn:example:nss?+%F0%9F%98%82%F0%9F%98%82%F0%9F%92%AF" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidRComponent`] when attempting to encode an empty +/// string. +pub fn encode_r_component(s: &str) -> Result { + if s.is_empty() { + return Err(Error::InvalidRComponent); + } + Ok(encode(s, PctEncoded::RComponent)) +} +/// Percent-decode a q-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, UrnBuilder}; +/// # fn test_main() -> Result<(), urn::Error> { +/// assert_eq!( +/// UrnBuilder::new("example", "nss") +/// .q_component(Some(&urn::percent::encode_q_component("~q component~")?)) +/// .build()? +/// .as_str(), +/// "urn:example:nss?=%7Eq%20component%7E" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// Returns [`Error::InvalidQComponent`] when attempting to encode an empty +/// string. +pub fn encode_q_component(s: &str) -> Result { + if s.is_empty() { + return Err(Error::InvalidQComponent); + } + Ok(encode(s, PctEncoded::QComponent)) +} +/// Percent-decode an f-component according to the RFC +/// +/// ``` +/// # use melib::utils::urn::{self, UrnBuilder}; +/// # fn test_main() -> Result<(), urn::Error> { +/// assert_eq!( +/// UrnBuilder::new("example", "nss") +/// .f_component(Some(&urn::percent::encode_f_component( +/// "f-component (pretty much a fragment)" +/// )?)) +/// .build()? +/// .as_str(), +/// "urn:example:nss#f-component%20(pretty%20much%20a%20fragment)" +/// ); +/// # Ok(()) } test_main().unwrap(); +/// ``` +/// +/// # Errors +/// None, this function returns a `Result` for API consistency. If the URN +/// standard gets extended in the future, this may return +/// `Error::InvalidFComponent`. +pub fn encode_f_component(s: &str) -> Result { + // fragment is allowed to be empty + Ok(encode(s, PctEncoded::FComponent)) +} diff --git a/melib/src/utils/urn/serde.rs b/melib/src/utils/urn/serde.rs new file mode 100644 index 00000000..51928be3 --- /dev/null +++ b/melib/src/utils/urn/serde.rs @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: Copyright 2021, 2022, 2023 chayleaf +// + +use std::borrow::Cow; + +use super::{cow::TriCow, parse_urn, Urn, UrnSlice}; + +impl<'de> serde::Deserialize<'de> for UrnSlice<'de> { + fn deserialize(de: D) -> Result>::Error> + where + D: serde::Deserializer<'de>, + { + let s = match Cow::::deserialize(de)? { + Cow::Owned(s) => TriCow::Owned(s), + Cow::Borrowed(s) => TriCow::Borrowed(s), + }; + parse_urn(s).map_err(serde::de::Error::custom) + } +} + +impl serde::Serialize for UrnSlice<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.serialize_str(self.as_str()) + } +} + +impl<'de> serde::Deserialize<'de> for Urn { + fn deserialize(de: D) -> Result>::Error> + where + D: serde::Deserializer<'de>, + { + #[allow(clippy::redundant_clone)] + Ok(UrnSlice::deserialize(de)?.to_owned()) + } +} + +impl serde::Serialize for Urn { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.0.serialize(serializer) + } +}