melib/build.rs: add feature to use cache instead of downloading unicode data

Signed-off-by: Manos Pitsidianakis <manos@pitsidianak.is>
9 months ago · 2dc2940586
parent 49a38a23bf
commit 2dc2940586
14 changed files with 170 additions and 131 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1271,7 +1271,6 @@ dependencies = [
 "tempfile",
 "termion",
 "toml",
 "unicode-segmentation",
 "xdg",
 ]
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@ -13,7 +13,7 @@ libfuzzer-sys = "0.3"
 [dependencies.melib]
 path = "../melib"
-features = ["unicode_algorithms"]
+features = ["unicode-algorithms"]
 # Prevent this from interfering with workspaces
 [workspace]
--- a/meli/Cargo.toml
+++ b/meli/Cargo.toml
@ -52,7 +52,6 @@ structopt = { version = "0.3.14", default-features = false }
 svg_crate = { version = "^0.13", optional = true, package = "svg" }
 termion = { version = "1.5.1", default-features = false }
 toml = { version = "0.5.6", default-features = false, features = ["preserve_order", ] }
 unicode-segmentation = "1.2.1" # >:c
 xdg = "2.1.0"
 [target.'cfg(target_os="linux")'.dependencies]
@ -71,7 +70,7 @@ regex = "1"
 tempfile = "3.3"
 [features]
-default = ["sqlite3", "notmuch", "regexp", "smtp", "dbus-notifications", "gpgme", "cli-docs", "jmap"]
+default = ["sqlite3", "notmuch", "regexp", "smtp", "dbus-notifications", "gpgme", "cli-docs", "jmap", "text-processing"]
 notmuch = ["melib/notmuch", ]
 jmap = ["melib/jmap",]
 sqlite3 = ["melib/sqlite3"]
@ -81,6 +80,7 @@ regexp = ["pcre2"]
 dbus-notifications = ["notify-rust",]
 cli-docs = ["flate2"]
 svgscreenshot = ["svg_crate"]
 text-processing = ["melib/unicode-algorithms"]
 gpgme = ["melib/gpgme"]
 # Print tracing logs as meli runs in stderr
--- a/melib/Cargo.toml
+++ b/melib/Cargo.toml
@ -64,7 +64,7 @@ stderrlog = "^0.5"
 flate2 = { version = "1.0.16" }
 [features]
-default = ["unicode-algorithms", "imap", "nntp", "maildir", "mbox", "vcard", "smtp"]
+default = ["imap", "nntp", "maildir", "mbox", "vcard", "smtp"]
 debug-tracing = []
 gpgme = []
--- a/melib/README.md
+++ b/melib/README.md
@ -6,18 +6,44 @@ Library for handling mail.
 ## optional features
-| feature flag           | dependencies                        | notes                    |
+| feature flag                 | dependencies                        | notes                    |
-| ---------------------- | ----------------------------------- | ------------------------ |
+|------------------------------|-------------------------------------|--------------------------|
-| `imap`                 | `native-tls`                        |                          |
+| `smtp`                       | `native-tls`, `base64`              | async SMTP communication |
-| `jmap`                 | `isahc`, `native-tls`, `serde_json` |                          |
+|------------------------------|-------------------------------------|--------------------------|
-| `maildir`              | `notify`                            |                          |
+| `imap`                       | `native-tls`                        |                          |
-| `mbox`                 | `notify`                            |                          |
+|------------------------------|-------------------------------------|--------------------------|
-| `notmuch`              | `notify`                            |                          |
+| `jmap`                       | `isahc`, `native-tls`, `serde_json` |                          |
-| `sqlite`               | `rusqlite`                          | used in IMAP cache       |
+|------------------------------|-------------------------------------|--------------------------|
-| `unicode_algorithms`   | `unicode-segmentation`              | linebreaking algo etc    |
+| `maildir`                    | `notify`                            |                          |
-| `vcard`                |                                     | vcard parsing            |
+|------------------------------|-------------------------------------|--------------------------|
-| `gpgme`                |                                     | GPG use with libgpgme    |
+| `mbox`                       | `notify`                            |                          |
-| `smtp`                 | `native-tls`, `base64`              | async SMTP communication |
+|------------------------------|-------------------------------------|--------------------------|
 | `notmuch`                    | `notify`                            |                          |
 |------------------------------|-------------------------------------|--------------------------|
 | `sqlite`                     | `rusqlite`                          | Used in IMAP cache.      |
 |------------------------------|-------------------------------------|--------------------------|
 | `unicode-algorithms`         | `unicode-segmentation`              | Linebreaking algo etc    |
 |                              |                                     | For a fresh clean build, |
 |                              |                                     | Network access is        |
 |                              |                                     | required to fetch data   |
 |                              |                                     | from Unicode's website.  |
 |------------------------------|-------------------------------------|--------------------------|
 | `unicode-algorithms-cached`  | `unicode-segmentation`              | Linebreaking algo etc    |
 |                              |                                     | but it uses a cached     |
 |                              |                                     | version of Unicode data  |
 |                              |                                     | which might be stale.    |
 |                              |                                     |                          |
 |                              |                                     | Use this feature instead |
 |                              |                                     | of the previous one for  |
 |                              |                                     | building without network |
 |                              |                                     | access.                  |
 |------------------------------|-------------------------------------|--------------------------|
 | `unicode-algorithms`         | `unicode-segmentation`              |                          |
 |------------------------------|-------------------------------------|--------------------------|
 | `vcard`                      |                                     | vcard parsing            |
 |------------------------------|-------------------------------------|--------------------------|
 | `gpgme`                      |                                     | GPG use with libgpgme    |
 |------------------------------|-------------------------------------|--------------------------|
 ## Example: Parsing bytes into an `Envelope`
--- a/melib/src/email/compose/mime.rs
+++ b/melib/src/email/compose/mime.rs
@ -20,14 +20,14 @@
 */
 use super::*;
-#[cfg(feature = "unicode_algorithms")]
+#[cfg(feature = "text-processing")]
 use crate::text_processing::grapheme_clusters::TextProcessing;
 pub fn encode_header(value: &str) -> String {
    let mut ret = String::with_capacity(value.len());
    let mut is_current_window_ascii = true;
    let mut current_window_start = 0;
-    #[cfg(feature = "unicode_algorithms")]
+    #[cfg(feature = "text-processing")]
    {
        let graphemes = value.graphemes_indices();
        for (idx, g) in graphemes {
@ -81,7 +81,7 @@ pub fn encode_header(value: &str) -> String {
            }
        }
    }
-    #[cfg(not(feature = "unicode_algorithms"))]
+    #[cfg(not(feature = "text-processing"))]
    {
        /* [ref:VERIFY] [ref:TODO]: test this. If it works as fine as the one above, there's no need to
         * keep the above implementation. */
--- a/melib/src/email/headers.rs
+++ b/melib/src/email/headers.rs
@ -125,6 +125,10 @@ impl HeaderMap {
        Self::default()
    }
    pub fn new() -> Self {
        Self::default()
    }
    pub fn get_mut<T: TryInto<HeaderName> + std::fmt::Debug>(
        &mut self,
        key: T,
--- a/melib/src/jmap/objects/submission.rs
+++ b/melib/src/jmap/objects/submission.rs
@ -290,8 +290,8 @@ pub struct EmailSubmissionSet {
    #[serde(flatten)]
    pub set_call: Set<EmailSubmissionObject>,
    /// onSuccessUpdateEmail: `Id[PatchObject]|null`
-    /// A map of [`EmailSubmissionObject`] id to an object containing properties to
+    /// A map of [`EmailSubmissionObject`] id to an object containing properties
-    /// update on the [`Email`](EmailObject) object referenced by the
+    /// to update on the [`Email`](EmailObject) object referenced by the
    /// [`EmailSubmissionObject`] if the create/update/destroy succeeds.  (For
    /// references to EmailSubmissions created in the same
    /// `/set` invocation, this is equivalent to a creation-reference, so the id
--- a/melib/src/text_processing/line_break.rs
+++ b/melib/src/text_processing/line_break.rs
@ -39,6 +39,7 @@ pub enum LineBreakCandidate {
    NoBreak, // Not used.
 }
 pub use alg::linear;
 use LineBreakCandidate::*;
 pub struct LineBreakCandidateIter<'a> {
@ -837,37 +838,6 @@ fn search_table(c: u32, t: &'static [(u32, u32, LineBreakClass)]) -> LineBreakCl
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_line_breaks() {
        let s = "Fell past it.\n\n‘Well!’ thought Alice to herself.";
        let breaks = LineBreakCandidateIter::new(s).collect::<Vec<(usize, LineBreakCandidate)>>();
        let mut prev = 0;
        for b in breaks {
            println!("{:?}", &s[prev..b.0]);
            prev = b.0;
        }
        println!("{:?}", &s[prev..]);
        let s = r#"Τ' άστρα τα κοντά -στη γλυκιά σελήνη
 την ειδή των κρύβουν - τη διαμαντένια,
 άμα φως λαμπρό -στη γή πάσα χύνει,
 όλη ασημένια."#;
        let breaks = LineBreakCandidateIter::new(s).collect::<Vec<(usize, LineBreakCandidate)>>();
        let mut prev = 0;
        for b in breaks {
            println!("{:?}", &s[prev..b.0]);
            prev = b.0;
        }
        println!("{:?}", &s[prev..]);
    }
 }
 pub use alg::linear;
 mod alg {
    use super::super::{grapheme_clusters::TextProcessing, *};
    fn cost(i: usize, j: usize, width: usize, minima: &[usize], offsets: &[usize]) -> usize {
@ -1184,6 +1154,7 @@ fn split(ret: &mut Vec<String>, mut line: &str, width: usize) {
        line = &line[chop_index..];
    }
 }
 fn reflow_helper(
    ret: &mut Vec<String>,
    paragraph: &str,
@ -1226,42 +1197,6 @@ fn reflow_helper(
    }
 }
 #[test]
 fn test_reflow() {
    let text = r#"`Take some more tea,' the March Hare said to Alice, very 
 earnestly.
 `I've had nothing yet,' Alice replied in an offended tone, `so 
 I can't take more.'
 `You mean you can't take LESS,' said the Hatter: `it's very 
 easy to take MORE than nothing.'"#;
    for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(30)) {
        println!("{}", l);
    }
    println!();
    for l in split_lines_reflow(text, Reflow::No, Some(30)) {
        println!("{}", l);
    }
    println!();
    let text = r#">>>Take some more tea.
 >>I've had nothing yet, so I can't take more.
 >You mean you can't take LESS, it's very easy to take 
 >MORE than nothing."#;
    for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(20)) {
        println!("{}", l);
    }
    println!();
    for l in split_lines_reflow(text, Reflow::No, Some(20)) {
        println!("{}", l);
    }
    println!();
    use super::_ALICE_CHAPTER_1;
    for l in split_lines_reflow(_ALICE_CHAPTER_1, Reflow::FormatFlowed, Some(72)) {
        println!("{}", l);
    }
 }
 mod segment_tree {
    //! Simple segment tree implementation for maximum in range queries. This
    //! is useful if given an  array of numbers you want to get the
@ -1812,3 +1747,68 @@ fn reflow_helper2(
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_line_breaks() {
        let s = "Fell past it.\n\n‘Well!’ thought Alice to herself.";
        let breaks = LineBreakCandidateIter::new(s).collect::<Vec<(usize, LineBreakCandidate)>>();
        let mut prev = 0;
        for b in breaks {
            println!("{:?}", &s[prev..b.0]);
            prev = b.0;
        }
        println!("{:?}", &s[prev..]);
        let s = r#"Τ' άστρα τα κοντά -στη γλυκιά σελήνη
 την ειδή των κρύβουν - τη διαμαντένια,
 άμα φως λαμπρό -στη γή πάσα χύνει,
 όλη ασημένια."#;
        let breaks = LineBreakCandidateIter::new(s).collect::<Vec<(usize, LineBreakCandidate)>>();
        let mut prev = 0;
        for b in breaks {
            println!("{:?}", &s[prev..b.0]);
            prev = b.0;
        }
        println!("{:?}", &s[prev..]);
    }
    #[test]
    fn test_reflow() {
        let text = r#"`Take some more tea,' the March Hare said to Alice, very 
 earnestly.
 `I've had nothing yet,' Alice replied in an offended tone, `so 
 I can't take more.'
 `You mean you can't take LESS,' said the Hatter: `it's very 
 easy to take MORE than nothing.'"#;
        for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(30)) {
            println!("{}", l);
        }
        println!();
        for l in split_lines_reflow(text, Reflow::No, Some(30)) {
            println!("{}", l);
        }
        println!();
        let text = r#">>>Take some more tea.
 >>I've had nothing yet, so I can't take more.
 >You mean you can't take LESS, it's very easy to take 
 >MORE than nothing."#;
        for l in split_lines_reflow(text, Reflow::FormatFlowed, Some(20)) {
            println!("{}", l);
        }
        println!();
        for l in split_lines_reflow(text, Reflow::No, Some(20)) {
            println!("{}", l);
        }
        println!();
        use crate::text_processing::_ALICE_CHAPTER_1;
        for l in split_lines_reflow(_ALICE_CHAPTER_1, Reflow::FormatFlowed, Some(72)) {
            println!("{}", l);
        }
    }
 }
--- a/melib/src/text_processing/mod.rs
+++ b/melib/src/text_processing/mod.rs
@ -226,25 +226,6 @@ impl GlobMatch for str {
    }
 }
 #[test]
 fn test_globmatch() {
    assert!("INBOX".matches_glob("INBOX"));
    assert!("INBOX/".matches_glob("INBOX"));
    assert!("INBOX".matches_glob("INBO?"));
    assert!("INBOX/Sent".matches_glob("INBOX/*"));
    assert!(!"INBOX/Sent".matches_glob("INBOX"));
    assert!(!"INBOX/Sent".matches_glob("*/Drafts"));
    assert!("INBOX/Sent".matches_glob("*/Sent"));
    assert!("INBOX/Archives/2047".matches_glob("*"));
    assert!("INBOX/Archives/2047".matches_glob("INBOX/*/2047"));
    assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/2*047"));
    assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/204?"));
    assert!(!"INBOX/Lists/".matches_glob("INBOX/Lists/*"));
 }
 pub const _ALICE_CHAPTER_1: &str = r#"CHAPTER I. Down the Rabbit-Hole
 Alice was beginning to get very tired of sitting by her sister on the 
@ -295,3 +276,27 @@ she fell past it.
 think nothing of tumbling down stairs! How brave they’ll all think me at 
 home! Why, I wouldn’t say anything about it, even if I fell off the top 
 of the house!’ (Which was very likely true.)"#;
 #[cfg(test)]
 mod tests {
    use crate::text_processing::GlobMatch;
    #[test]
    fn test_globmatch() {
        assert!("INBOX".matches_glob("INBOX"));
        assert!("INBOX/".matches_glob("INBOX"));
        assert!("INBOX".matches_glob("INBO?"));
        assert!("INBOX/Sent".matches_glob("INBOX/*"));
        assert!(!"INBOX/Sent".matches_glob("INBOX"));
        assert!(!"INBOX/Sent".matches_glob("*/Drafts"));
        assert!("INBOX/Sent".matches_glob("*/Sent"));
        assert!("INBOX/Archives/2047".matches_glob("*"));
        assert!("INBOX/Archives/2047".matches_glob("INBOX/*/2047"));
        assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/2*047"));
        assert!("INBOX/Archives/2047".matches_glob("INBOX/Archives/204?"));
        assert!(!"INBOX/Lists/".matches_glob("INBOX/Lists/*"));
    }
 }
--- a/melib/src/text_processing/search.rs
+++ b/melib/src/text_processing/search.rs
@ -83,14 +83,19 @@ impl KMP for str {
    }
 }
-#[test]
+#[cfg(test)]
-fn test_search() {
+mod tests {
-    use super::_ALICE_CHAPTER_1;
+    use crate::text_processing::search::KMP;
-    for ind in _ALICE_CHAPTER_1.kmp_search("Alice") {
+
-        println!(
+    #[test]
-            "{:#?}",
+    fn test_search() {
-            &_ALICE_CHAPTER_1
+        use crate::text_processing::_ALICE_CHAPTER_1;
-                [ind.saturating_sub(0)..std::cmp::min(_ALICE_CHAPTER_1.len(), ind + 25)]
+        for ind in _ALICE_CHAPTER_1.kmp_search("Alice") {
-        );
+            println!(
                "{:#?}",
                &_ALICE_CHAPTER_1
                    [ind.saturating_sub(0)..std::cmp::min(_ALICE_CHAPTER_1.len(), ind + 25)]
            );
        }
    }
 }
--- a/melib/src/text_processing/tables.rs.gz
+++ b/melib/src/text_processing/tables.rs.gz
--- a/melib/src/thread.rs
+++ b/melib/src/thread.rs
@ -52,7 +52,7 @@ pub use iterators::*;
 use smallvec::SmallVec;
 use uuid::Uuid;
-#[cfg(feature = "unicode_algorithms")]
+#[cfg(feature = "text-processing")]
 use crate::text_processing::grapheme_clusters::*;
 type Envelopes = Arc<RwLock<HashMap<EnvelopeHash, Envelope>>>;
@ -1219,13 +1219,13 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    ma.subject()
                        .split_graphemes()
                        .cmp(&mb.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    ma.subject().cmp(&mb.subject())
                }
@ -1248,7 +1248,7 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    mb.subject()
                        .as_ref()
@ -1256,7 +1256,7 @@ impl Threads {
                        .cmp(&ma.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    mb.subject().as_ref().cmp(&ma.subject())
                }
@ -1299,13 +1299,13 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    ma.subject()
                        .split_graphemes()
                        .cmp(&mb.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    ma.subject().cmp(&mb.subject())
                }
@ -1328,7 +1328,7 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    mb.subject()
                        .as_ref()
@ -1336,7 +1336,7 @@ impl Threads {
                        .cmp(&ma.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    mb.subject().as_ref().cmp(&ma.subject())
                }
@ -1375,13 +1375,13 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    ma.subject()
                        .split_graphemes()
                        .cmp(&mb.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    ma.subject().cmp(&mb.subject())
                }
@ -1404,7 +1404,7 @@ impl Threads {
                }
                let ma = &envelopes[&a.unwrap()];
                let mb = &envelopes[&b.unwrap()];
-                #[cfg(feature = "unicode_algorithms")]
+                #[cfg(feature = "text-processing")]
                {
                    mb.subject()
                        .as_ref()
@ -1412,7 +1412,7 @@ impl Threads {
                        .cmp(&ma.subject().split_graphemes())
                }
-                #[cfg(not(feature = "unicode_algorithms"))]
+                #[cfg(not(feature = "text-processing"))]
                {
                    mb.subject().as_ref().cmp(&ma.subject())
                }
--- a/tools/Cargo.toml
+++ b/tools/Cargo.toml
@ -39,7 +39,7 @@ path = "src/embed.rs"
 [dependencies]
 crossbeam = { version = "^0.8" }
 meli = { version = "0.7" }
-melib = { version = "0.7", features = ["debug-tracing", "unicode_algorithms"] }
+melib = { version = "0.7", features = ["debug-tracing", "unicode-algorithms"] }
 nix = { version = "^0.24", default-features = false }
 signal-hook = { version = "^0.3", default-features = false }
 signal-hook-registry = { version = "1.2.0", default-features = false }