Read search index from search-index.js

With this patch, we read the search index generated by rustdoc.  It
allows us to suggest matching items if we don’t find an exact match.
master
Robin Krahl 4 years ago
parent d4dd4d6548
commit 61fdf31c0a
No known key found for this signature in database
GPG Key ID: 8E9B0870524F69D8

27
Cargo.lock generated

@ -478,6 +478,9 @@ dependencies = [
"html2text 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
"kuchiki 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"pager 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.56 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_tuple 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"structopt 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
"termion 1.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -510,6 +513,9 @@ dependencies = [
name = "serde"
version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde_derive 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_derive"
@ -531,6 +537,25 @@ dependencies = [
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_tuple"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_tuple_macros 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_tuple_macros"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.18 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.34 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "servo_arc"
version = "0.1.1"
@ -787,6 +812,8 @@ dependencies = [
"checksum serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)" = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3"
"checksum serde_derive 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)" = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e"
"checksum serde_json 1.0.56 (registry+https://github.com/rust-lang/crates.io-index)" = "3433e879a558dde8b5e8feb2a04899cf34fdde1fafb894687e52105fc1162ac3"
"checksum serde_tuple 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f4f025b91216f15a2a32aa39669329a475733590a015835d1783549a56d09427"
"checksum serde_tuple_macros 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4076151d1a2b688e25aaf236997933c66e18b870d0369f8b248b8ab2be630d7e"
"checksum servo_arc 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432"
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
"checksum smallvec 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3757cb9d89161a2f24e1cf78efa0c1fcff485d18e3f55e0aa3480824ddaa0f3f"

@ -17,8 +17,14 @@ anyhow = "1.0.31"
html2text = "0.1.12"
kuchiki = "0.8.0"
pager = "0.15.0"
serde_json = "1.0.56"
serde_tuple = "0.5.0"
termion = "1.5.5"
[dependencies.serde]
version = "1.0.114"
features = ["derive"]
[dependencies.structopt]
version = "0.3.15"
default-features = false

@ -0,0 +1,168 @@
// SPDX-FileCopyrightText: 2020 Robin Krahl <robin.krahl@ireas.org>
// SPDX-License-Identifier: MIT
//! Search index for a documentation source.
//!
//! The search index is read from the `search-index.js` file generated by rustdoc. It contains a
//! list of items groupd by their crate.
//!
//! For details on the format of the search index, see the `html/render.rs` file in `librustdoc`.
//! Note that the format of the search index changed in April 2020 with commit
//! b4fb3069ce82f61f84a9487d17fb96389d55126a. We only support the new format as the old format is
//! much harder to parse.
use std::collections;
use std::fmt;
use std::fs;
use std::io;
use std::path;
#[derive(Debug)]
pub struct Index {
data: Data,
}
#[derive(Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
pub struct IndexItem {
pub path: String,
pub name: String,
pub description: String,
}
impl fmt::Display for IndexItem {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.description.is_empty() {
write!(f, "{}::{}", &self.path, &self.name)
} else {
write!(f, "{}::{}: {}", &self.path, &self.name, &self.description)
}
}
}
#[derive(Debug, Default, PartialEq, serde::Deserialize)]
#[serde(transparent)]
struct Data {
crates: collections::HashMap<String, CrateData>,
}
#[derive(Debug, Default, PartialEq, serde::Deserialize)]
struct CrateData {
#[serde(rename = "i")]
items: Vec<ItemData>,
}
#[derive(Debug, Default, PartialEq, serde_tuple::Deserialize_tuple)]
struct ItemData {
ty: usize,
name: String,
path: String,
desc: String,
parent: Option<usize>,
_ignored: serde_json::Value,
}
impl Index {
pub fn load(path: impl AsRef<path::Path>) -> anyhow::Result<Option<Self>> {
use std::io::BufRead;
anyhow::ensure!(
path.as_ref().is_file(),
"Search index '{}' must be a file",
path.as_ref().display()
);
let mut json: Option<String> = None;
let mut finished = false;
for line in io::BufReader::new(fs::File::open(path)?).lines() {
let line = line?;
if let Some(json) = &mut json {
if line == "}');" {
json.push_str("}");
finished = true;
break;
} else {
json.push_str(line.trim_end_matches('\\'));
}
} else if line == "var searchIndex = JSON.parse('{\\" {
json = Some(String::from("{"));
}
}
if let Some(json) = json {
if finished {
use anyhow::Context;
let json = json.replace("\\'", "'");
let data: Data =
serde_json::from_str(&json).context("Could not parse search index")?;
Ok(Some(Index { data }))
} else {
Ok(None)
}
} else {
Ok(None)
}
}
pub fn find(&self, keyword: &str) -> Vec<IndexItem> {
let mut matches: Vec<IndexItem> = Vec::new();
for (krate, data) in &self.data.crates {
for item in &data.items {
if item.name == keyword {
matches.push(IndexItem {
name: item.name.clone(),
path: if item.path.is_empty() {
krate.to_owned()
} else {
item.path.clone()
},
description: item.desc.clone(),
});
}
}
}
matches.sort_unstable();
matches.dedup();
matches
}
}
#[cfg(test)]
mod tests {
use super::{CrateData, Data, ItemData};
#[test]
fn test_empty() {
let expected: Data = Default::default();
let actual: Data = serde_json::from_str("{}").unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_empty_crate() {
let mut expected: Data = Default::default();
expected
.crates
.insert("test".to_owned(), Default::default());
let actual: Data = serde_json::from_str("{\"test\": {\"i\": []}}").unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_one_item() {
let mut expected: Data = Default::default();
let mut krate: CrateData = Default::default();
let mut item: ItemData = Default::default();
item.name = "name".to_owned();
item.path = "path".to_owned();
item.desc = "desc".to_owned();
krate.items.push(item);
expected.crates.insert("test".to_owned(), krate);
let actual: Data = serde_json::from_str(
"{\"test\": {\"i\": [[0, \"name\", \"path\", \"desc\", null, null]]}}",
)
.unwrap();
assert_eq!(expected, actual);
}
}

@ -2,10 +2,12 @@
// SPDX-License-Identifier: MIT
mod doc;
mod index;
mod parser;
mod source;
mod viewer;
use std::io;
use std::path;
use structopt::StructOpt;
@ -37,11 +39,20 @@ struct Opt {
fn main() -> anyhow::Result<()> {
let opt = Opt::from_args();
let sources = load_sources(&opt.source_paths, !opt.no_default_sources)?;
let doc = find_doc(&sources, &opt.keyword)?;
let viewer = opt.viewer.unwrap_or_else(viewer::get_default);
viewer.open(&doc)
let doc = if let Some(doc) = find_doc(&sources, &opt.keyword)? {
Some(doc)
} else {
search_doc(&sources, &opt.keyword)?
};
if let Some(doc) = doc {
let viewer = opt.viewer.unwrap_or_else(viewer::get_default);
viewer.open(&doc)
} else {
// item selection cancelled by user
Ok(())
}
}
const DEFAULT_SOURCES: &[&str] = &[
@ -75,27 +86,98 @@ fn load_sources(
Ok(vec)
}
fn find_doc(sources: &[Box<dyn source::Source>], keyword: &str) -> anyhow::Result<doc::Doc> {
use anyhow::Context;
fn find_doc(
sources: &[Box<dyn source::Source>],
keyword: &str,
) -> anyhow::Result<Option<doc::Doc>> {
let parts: Vec<&str> = keyword.split("::").collect();
let crate_ = find_crate(sources, parts[0])?;
let item = crate_
.find_item(&parts[1..])?
.or_else(|| crate_.find_module(&parts[1..]))
.or_else(|| crate_.find_member(&parts[1..]))
.with_context(|| format!("Could not find the item {}", keyword))?;
item.load_doc()
if let Some(crate_) = find_crate(sources, parts[0]) {
crate_
.find_item(&parts[1..])?
.or_else(|| crate_.find_module(&parts[1..]))
.or_else(|| crate_.find_member(&parts[1..]))
.map(|i| i.load_doc())
.transpose()
} else {
Ok(None)
}
}
fn find_crate(sources: &[Box<dyn source::Source>], name: &str) -> anyhow::Result<doc::Crate> {
use anyhow::Context;
fn find_crate(sources: &[Box<dyn source::Source>], name: &str) -> Option<doc::Crate> {
sources.iter().filter_map(|s| s.find_crate(name)).next()
}
fn search_doc(
sources: &[Box<dyn source::Source>],
keyword: &str,
) -> anyhow::Result<Option<doc::Doc>> {
if let Some(item) = search_item(sources, keyword)? {
use anyhow::Context;
let item = format!("{}::{}", item.path, item.name);
let doc = find_doc(sources, &item)?
.with_context(|| format!("Could not find documentation for {}", &item))?;
Ok(Some(doc))
} else {
Ok(None)
}
}
sources
fn search_item(
sources: &[Box<dyn source::Source>],
keyword: &str,
) -> anyhow::Result<Option<index::IndexItem>> {
let indexes = sources
.iter()
.filter_map(|s| s.load_index().transpose())
.collect::<anyhow::Result<Vec<_>>>()?;
let mut items = indexes
.iter()
.filter_map(|s| s.find_crate(name))
.next()
.with_context(|| format!("Could not find the crate {}", name))
.map(|i| i.find(keyword))
.collect::<Vec<_>>()
.concat();
items.sort_unstable();
items.dedup();
if items.is_empty() {
Ok(None)
} else if items.len() == 1 {
Ok(Some(items[0].clone()))
} else {
select_item(&items, keyword)
}
}
fn select_item(
items: &[index::IndexItem],
keyword: &str,
) -> anyhow::Result<Option<index::IndexItem>> {
use std::io::Write;
// If we are not on a TTY, we cant ask the user to select an item --> abort
anyhow::ensure!(
termion::is_tty(&io::stdin()),
"Found multiple matches for {}",
keyword
);
println!("Found mulitple matches for {} select one of:", keyword);
println!();
let width = (items.len() + 1).to_string().len();
for (i, item) in items.iter().enumerate() {
println!("[ {:width$} ] {}", i, &item, width = width);
}
println!();
print!("> ");
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
if let Ok(i) = usize::from_str_radix(input.trim(), 10) {
Ok(items.get(i).map(Clone::clone))
} else {
Ok(None)
}
}
#[cfg(test)]
@ -118,10 +200,16 @@ mod tests {
let path = ensure_docs();
let sources = vec![source::get_source(path).unwrap()];
super::find_doc(&sources, "kuchiki").unwrap();
super::find_doc(&sources, "kuchiki::NodeRef").unwrap();
super::find_doc(&sources, "kuchiki::NodeDataRef::as_node").unwrap();
super::find_doc(&sources, "kuchiki::traits").unwrap();
super::find_doc(&sources, "kachiki").unwrap_err();
assert!(super::find_doc(&sources, "kuchiki").unwrap().is_some());
assert!(super::find_doc(&sources, "kuchiki::NodeRef")
.unwrap()
.is_some());
assert!(super::find_doc(&sources, "kuchiki::NodeDataRef::as_node")
.unwrap()
.is_some());
assert!(super::find_doc(&sources, "kuchiki::traits")
.unwrap()
.is_some());
assert!(super::find_doc(&sources, "kachiki").unwrap().is_none());
}
}

@ -8,10 +8,12 @@ use std::path;
use anyhow::anyhow;
use crate::doc;
use crate::index;
/// Documentation source, for example a local directory.
pub trait Source {
fn find_crate(&self, name: &str) -> Option<doc::Crate>;
fn load_index(&self) -> anyhow::Result<Option<index::Index>>;
}
/// Local directory containing documentation data.
@ -39,6 +41,15 @@ impl Source for DirSource {
None
}
}
fn load_index(&self) -> anyhow::Result<Option<index::Index>> {
let index_path = self.path.join("search-index.js");
if index_path.is_file() {
index::Index::load(&index_path)
} else {
Ok(None)
}
}
}
pub fn get_source<P: AsRef<path::Path>>(path: P) -> anyhow::Result<Box<dyn Source>> {

Loading…
Cancel
Save