2
0
mirror of https://github.com/xvxx/phetch synced 2024-11-10 13:10:54 +00:00
phetch/src/gopher.rs

520 lines
16 KiB
Rust
Raw Normal View History

2020-01-12 03:34:27 +00:00
//! phetch's Gopher library contains a few phetch-specific features:
//! the ability to make requests or downloads over TLS or Tor,
//! cleaning Unicode control characters from Gopher responses, and
//! URL parsing that recognizes different protocols like telnet and
//! IPv6 addresses.
use crate::ui::{self, Key};
2019-12-26 23:53:51 +00:00
use std::{
2020-02-05 20:49:34 +00:00
fs,
2019-12-26 23:53:51 +00:00
io::{Read, Result, Write},
net::TcpStream,
net::ToSocketAddrs,
os::unix::fs::OpenOptionsExt,
time::Duration,
};
#[cfg(feature = "tor")]
2020-01-11 02:34:57 +00:00
use tor_stream::TorStream;
2019-12-19 04:23:26 +00:00
#[cfg(feature = "tls")]
2020-01-06 18:18:27 +00:00
use native_tls::TlsConnector;
2020-01-06 07:38:19 +00:00
mod r#type;
pub use self::r#type::Type;
2020-01-12 03:34:27 +00:00
/// Some Gopher servers can be kind of slow, we may want to up this or
/// make it configurable eventually.
pub const TCP_TIMEOUT_IN_SECS: u64 = 8;
2020-01-14 21:46:19 +00:00
/// Based on `TCP_TIMEOUT_IN_SECS` but a `Duration` type.
2020-01-12 03:34:27 +00:00
pub const TCP_TIMEOUT_DURATION: Duration = Duration::from_secs(TCP_TIMEOUT_IN_SECS);
2020-01-06 08:11:50 +00:00
trait ReadWrite: Read + Write {}
impl<T: Read + Write> ReadWrite for T {}
/// Wrapper for TLS and regular TCP streams.
pub struct Stream {
io: Box<dyn ReadWrite>,
2020-01-06 08:37:08 +00:00
tls: bool,
}
impl Stream {
fn is_tls(&self) -> bool {
self.tls
}
2020-01-06 08:11:50 +00:00
}
impl Read for Stream {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
self.io.read(buf)
}
}
impl Write for Stream {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
self.io.write(buf)
}
fn flush(&mut self) -> Result<()> {
self.io.flush()
}
}
/// Gopher URL. Returned by `parse_url()`.
pub struct Url<'a> {
/// Gopher Type
pub typ: Type,
/// Hostname
pub host: &'a str,
/// Port. Defaults to 70
pub port: &'a str,
/// Selector
pub sel: &'a str,
}
2020-01-06 08:37:08 +00:00
/// Fetches a gopher URL and returns a tuple of:
/// (did tls work?, raw Gopher response)
2020-01-11 02:34:57 +00:00
pub fn fetch_url(url: &str, tls: bool, tor: bool) -> Result<(bool, String)> {
let u = parse_url(url);
fetch(u.host, u.port, u.sel, tls, tor)
2019-12-17 05:47:33 +00:00
}
2020-01-06 08:37:08 +00:00
/// Fetches a gopher URL by its component parts and returns a tuple of:
/// (did tls work?, raw Gopher response)
2020-01-11 02:34:57 +00:00
pub fn fetch(
host: &str,
port: &str,
selector: &str,
tls: bool,
tor: bool,
) -> Result<(bool, String)> {
let mut stream = request(host, port, selector, tls, tor)?;
2020-01-09 00:55:27 +00:00
let mut body = Vec::new();
stream.read_to_end(&mut body)?;
let mut out = String::from_utf8_lossy(&body).to_string();
clean_response(&mut out);
2020-01-09 19:56:06 +00:00
Ok((stream.is_tls(), out))
}
/// Removes unprintable characters from Gopher response.
/// https://en.wikipedia.org/wiki/Control_character#In_Unicode
fn clean_response(res: &mut String) {
res.retain(|c| match c {
'\u{007F}' => false,
_ if c >= '\u{0080}' && c <= '\u{009F}' => false,
_ => true,
})
2019-12-16 23:54:20 +00:00
}
2019-12-17 01:01:23 +00:00
/// Downloads a binary to disk. Allows canceling with Ctrl-c, but it's
/// kind of hacky - needs the UI receiver passed in.
2020-01-06 08:37:08 +00:00
/// Returns a tuple of:
/// (path it was saved to, the size in bytes)
pub fn download_url(
url: &str,
tls: bool,
tor: bool,
chan: ui::KeyReceiver,
) -> Result<(String, usize)> {
let u = parse_url(url);
let filename = u
.sel
2019-12-21 02:32:08 +00:00
.split_terminator('/')
.rev()
2020-05-02 20:49:50 +00:00
.next()
.ok_or_else(|| error!("Bad download filename: {}", u.sel))?;
2019-12-21 02:22:33 +00:00
let mut path = std::path::PathBuf::from(".");
path.push(filename);
2019-12-21 02:11:20 +00:00
let mut stream = request(u.host, u.port, u.sel, tls, tor)?;
2020-02-05 20:49:34 +00:00
let mut file = fs::OpenOptions::new()
2020-01-06 07:49:57 +00:00
.write(true)
.create(true)
.truncate(true)
.mode(0o770)
2020-02-05 20:49:34 +00:00
.open(&path)?;
2019-12-22 08:10:06 +00:00
2020-01-06 07:49:57 +00:00
let mut buf = [0; 1024];
let mut bytes = 0;
while let Ok(count) = stream.read(&mut buf) {
if count == 0 {
break;
2019-12-22 08:10:06 +00:00
}
2020-01-06 07:49:57 +00:00
bytes += count;
2020-01-14 21:08:48 +00:00
file.write_all(&buf[..count])?;
if let Ok(chan) = chan.lock() {
if let Ok(Key::Ctrl('c')) = chan.try_recv() {
if path.exists() {
fs::remove_file(path)?;
}
return Err(error!("Download cancelled"));
2020-02-05 20:49:34 +00:00
}
2020-01-06 07:49:57 +00:00
}
}
2020-05-23 17:46:47 +00:00
2020-01-06 07:49:57 +00:00
Ok((filename.to_string(), bytes))
2019-12-22 08:10:06 +00:00
}
2020-01-06 07:47:13 +00:00
/// Make a Gopher request and return a TcpStream ready to be read()'d.
2020-01-06 08:11:50 +00:00
/// Will attempt a TLS connection first, then retry a regular
/// connection if it fails.
2020-01-11 02:34:57 +00:00
pub fn request(host: &str, port: &str, selector: &str, tls: bool, tor: bool) -> Result<Stream> {
2019-12-22 08:10:06 +00:00
let selector = selector.replace('?', "\t"); // search queries
2020-01-16 09:46:47 +00:00
let addr = format!("{}:{}", host, port);
2020-01-06 08:11:50 +00:00
// attempt tls connection
2020-01-11 02:34:57 +00:00
if tls {
#[cfg(feature = "tls")]
2020-01-11 02:34:57 +00:00
{
{
if let Ok(connector) = TlsConnector::new() {
2020-01-16 09:46:47 +00:00
let sock = addr.to_socket_addrs().and_then(|mut socks| {
socks.next().ok_or_else(|| error!("Can't create socket"))
})?;
2020-01-11 02:34:57 +00:00
let stream = TcpStream::connect_timeout(&sock, TCP_TIMEOUT_DURATION)?;
stream.set_read_timeout(Some(TCP_TIMEOUT_DURATION))?;
if let Ok(mut stream) = connector.connect(host, stream) {
2020-01-16 03:51:28 +00:00
stream.write_all(selector.as_ref())?;
stream.write_all("\r\n".as_ref())?;
2020-01-11 02:34:57 +00:00
return Ok(Stream {
io: Box::new(stream),
tls: true,
});
}
2020-01-06 18:26:58 +00:00
}
2020-01-06 08:26:05 +00:00
}
2020-01-06 08:11:50 +00:00
}
}
2020-01-11 07:02:19 +00:00
// tls didn't work or wasn't selected, try Tor or default
2020-01-11 02:34:57 +00:00
if tor {
#[cfg(feature = "tor")]
{
let proxy = std::env::var("TOR_PROXY")
2020-01-14 21:08:48 +00:00
.unwrap_or_else(|_| "127.0.0.1:9050".into())
.to_socket_addrs()?
2020-05-02 20:49:50 +00:00
.next()
.unwrap();
2020-01-16 09:46:47 +00:00
let mut stream = match TorStream::connect_with_address(proxy, addr.as_ref()) {
Ok(s) => s,
Err(e) => return Err(error!("Tor error: {}", e)),
};
2020-01-16 03:51:28 +00:00
stream.write_all(selector.as_ref())?;
stream.write_all("\r\n".as_ref())?;
return Ok(Stream {
io: Box::new(stream),
tls: false,
});
}
2020-01-11 02:34:57 +00:00
}
// no tls or tor, try regular connection
2020-01-16 09:46:47 +00:00
let sock = addr
.to_socket_addrs()
.and_then(|mut socks| socks.next().ok_or_else(|| error!("Can't create socket")))?;
let mut stream = TcpStream::connect_timeout(&sock, TCP_TIMEOUT_DURATION)?;
stream.set_read_timeout(Some(TCP_TIMEOUT_DURATION))?;
2020-01-16 03:51:28 +00:00
stream.write_all(selector.as_ref())?;
stream.write_all("\r\n".as_ref())?;
Ok(Stream {
io: Box::new(stream),
tls: false,
})
2019-12-21 02:11:20 +00:00
}
impl<'a> Url<'a> {
/// Creates a new Gopher Url quickly from a tuple of Url fields.
pub fn new(typ: Type, host: &'a str, port: &'a str, sel: &'a str) -> Url<'a> {
Url {
typ,
host,
port,
sel,
}
}
}
2020-01-15 07:27:10 +00:00
/// Given a Gopher URL, returns a gopher::Type.
pub fn type_for_url(url: &str) -> Type {
if url.starts_with("telnet://") {
return Type::Telnet;
}
if url.starts_with("URL:") || url.starts_with("/URL:") {
return Type::HTML;
}
2020-01-17 06:29:15 +00:00
let url = url.trim_start_matches("gopher://");
2020-01-15 07:27:10 +00:00
if let Some(idx) = url.find('/') {
2020-01-15 07:29:26 +00:00
if let Some(t) = url.chars().nth(idx + 1) {
return Type::from(t).unwrap_or(Type::Menu);
2020-01-15 07:27:10 +00:00
}
}
Type::Menu
}
2020-01-06 07:47:13 +00:00
/// Parses gopher URL into parts.
2020-01-16 03:35:21 +00:00
pub fn parse_url(url: &str) -> Url {
2020-01-09 01:41:51 +00:00
let mut url = url.trim_start_matches("gopher://");
let mut typ = Type::Menu;
let mut host;
let mut port = "70";
let mut sel = "";
2019-12-17 01:01:23 +00:00
2019-12-23 22:12:20 +00:00
// simple URLs, ex: "dog.com"
2019-12-21 22:30:35 +00:00
if !url.contains(':') && !url.contains('/') {
return Url::new(Type::Menu, url, "70", "");
2019-12-21 22:30:35 +00:00
}
2020-01-09 01:41:51 +00:00
// telnet urls
if url.starts_with("telnet://") {
typ = Type::Telnet;
url = url.trim_start_matches("telnet://");
} else if url.contains("://") {
// non-gopher URLs, stick everything in selector
return Url::new(Type::HTML, "", "", url);
2019-12-23 22:12:20 +00:00
}
2019-12-21 22:30:35 +00:00
// check selector first
if let Some(idx) = url.find('/') {
host = &url[..idx];
sel = &url[idx..];
} else {
host = &url;
2019-12-17 01:01:23 +00:00
}
2019-12-21 22:47:08 +00:00
// ipv6
if let Some(idx) = host.find('[') {
if let Some(end) = host[idx + 1..].find(']') {
2019-12-25 01:22:12 +00:00
host = &host[idx + 1..=end];
2019-12-21 22:47:08 +00:00
if host.len() > end {
if let Some(idx) = host[end..].find(':') {
port = &host[idx + 1..];
}
}
} else {
return Url::new(Type::Error, "Unclosed ipv6 bracket", "", url);
2019-12-21 22:47:08 +00:00
}
} else if let Some(idx) = host.find(':') {
// two :'s == probably ipv6
if host.len() > idx + 1 && !host[idx + 1..].contains(':') {
// regular hostname w/ port -- grab port
2019-12-21 22:30:35 +00:00
port = &host[idx + 1..];
host = &host[..idx];
}
}
2019-12-17 01:01:23 +00:00
2019-12-21 22:30:35 +00:00
// ignore type prefix on selector
2020-01-09 01:41:51 +00:00
if typ != Type::Telnet {
let mut chars = sel.chars();
2020-05-02 20:49:50 +00:00
if let (Some('/'), Some(c)) = (chars.next(), chars.next()) {
2020-01-09 01:41:51 +00:00
if let Some(t) = Type::from(c) {
typ = t;
sel = &sel[2..];
}
2019-12-17 01:01:23 +00:00
}
}
Url::new(typ, host, port, sel)
2019-12-17 01:01:23 +00:00
}
2019-12-20 22:50:58 +00:00
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_parse() {
let urls = vec![
"gopher://gopher.club/1/phlogs/",
"gopher://sdf.org:7777/1/maps",
"gopher.floodgap.org",
"gopher.floodgap.com/0/gopher/relevance.txt",
"gopher://gopherpedia.com/7/lookup?Gopher",
2019-12-21 22:30:35 +00:00
"gopher://dead:beef:1234:5678:9012:3456:feed:deed",
2019-12-21 22:47:08 +00:00
"gopher://[1234:2345:dead:4567:7890:1234:beef:1111]:7443/1/files",
"gopher://2001:cdba:0000:0000:0000:0000:3257:9121",
"[2001:cdba::3257:9652]",
"gopher://9999:aaaa::abab:baba:aaaa:9999",
"[2001:2099:dead:beef:0000",
"::1",
2019-12-23 22:10:27 +00:00
"ssh://kiosk@bitreich.org",
2020-01-11 20:55:33 +00:00
"https://github.com/xvxx/phetch",
2020-01-09 01:41:51 +00:00
"telnet://bbs.impakt.net:6502/",
2020-05-23 17:46:47 +00:00
"gopher://some.url/9/file.mp4",
"gopher://some.url/;/file.mp4",
"mtv.com/s/best-of-britney-spears.mp3",
"gopher://microsoft.com:7070/x/developer/sitemap.xml",
"gopher://mtv.com/c/kriss-kross/tour-dates.ical",
"gopher://protonmail.com/M/mymail/inbox.eml",
2019-12-20 22:50:58 +00:00
];
2020-05-23 17:46:47 +00:00
let mut urls = urls.iter();
macro_rules! parse_next_url {
() => {
parse_url(urls.next().unwrap())
};
}
2019-12-20 22:50:58 +00:00
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "gopher.club");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/phlogs/");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "sdf.org");
assert_eq!(url.port, "7777");
assert_eq!(url.sel, "/maps");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "gopher.floodgap.org");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Text);
assert_eq!(url.host, "gopher.floodgap.com");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/gopher/relevance.txt");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Search);
assert_eq!(url.host, "gopherpedia.com");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/lookup?Gopher");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "dead:beef:1234:5678:9012:3456:feed:deed");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "1234:2345:dead:4567:7890:1234:beef:1111");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/files");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "2001:cdba:0000:0000:0000:0000:3257:9121");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "2001:cdba::3257:9652");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "9999:aaaa::abab:baba:aaaa:9999");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Error);
assert_eq!(url.host, "Unclosed ipv6 bracket");
assert_eq!(url.port, "");
assert_eq!(url.sel, "[2001:2099:dead:beef:0000");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Menu);
assert_eq!(url.host, "::1");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::HTML);
assert_eq!(url.host, "");
assert_eq!(url.port, "");
assert_eq!(url.sel, "ssh://kiosk@bitreich.org");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::HTML);
assert_eq!(url.host, "");
assert_eq!(url.port, "");
assert_eq!(url.sel, "https://github.com/xvxx/phetch");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Telnet);
assert_eq!(url.host, "bbs.impakt.net");
assert_eq!(url.port, "6502");
assert_eq!(url.sel, "/");
2020-05-23 17:46:47 +00:00
let url = parse_next_url!();
assert_eq!(url.typ, Type::Binary);
assert_eq!(url.host, "some.url");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/file.mp4");
let url = parse_next_url!();
assert_eq!(url.typ, Type::Video);
assert_eq!(url.host, "some.url");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/file.mp4");
let url = parse_next_url!();
assert_eq!(url.typ, Type::Sound);
assert_eq!(url.host, "mtv.com");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/best-of-britney-spears.mp3");
let url = parse_next_url!();
assert_eq!(url.typ, Type::Xml);
assert_eq!(url.host, "microsoft.com");
assert_eq!(url.port, "7070");
assert_eq!(url.sel, "/developer/sitemap.xml");
let url = parse_next_url!();
assert_eq!(url.typ, Type::Calendar);
assert_eq!(url.host, "mtv.com");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/kriss-kross/tour-dates.ical");
let url = parse_next_url!();
assert_eq!(url.typ, Type::Mailbox);
assert_eq!(url.host, "protonmail.com");
assert_eq!(url.port, "70");
assert_eq!(url.sel, "/mymail/inbox.eml");
// make sure we got em all
assert_eq!(urls.next(), None);
2019-12-20 22:50:58 +00:00
}
2020-01-15 07:27:10 +00:00
#[test]
fn test_type_for_url() {
assert_eq!(type_for_url("phkt.io"), Type::Menu);
assert_eq!(type_for_url("phkt.io/1"), Type::Menu);
assert_eq!(type_for_url("phkt.io/1/"), Type::Menu);
assert_eq!(type_for_url("phkt.io/0/info.txt"), Type::Text);
2020-01-17 06:29:15 +00:00
assert_eq!(
type_for_url("gopher://vernunftzentrum.de/0/tfurrows/resources/tokipona.txt"),
Type::Text
);
2020-01-15 07:27:10 +00:00
assert_eq!(type_for_url("URL:https://google.com"), Type::HTML);
assert_eq!(
type_for_url("telnet://bbs.inter.net:6502/connect"),
Type::Telnet
);
}
2020-02-24 00:01:49 +00:00
#[test]
fn test_clean_response() {
let mut test = "Hi".to_string();
test.push('\u{007F}');
test.push_str(" there!");
test.push('\u{0082}');
clean_response(&mut test);
assert_eq!(test, "Hi there!".to_string());
2020-02-24 00:01:49 +00:00
let mut test = "* \x1b[92mTitle\x1b[0m".to_string();
clean_response(&mut test);
assert_eq!(test, "* \x1b[92mTitle\x1b[0m".to_string());
2020-02-24 00:01:49 +00:00
}
2019-12-20 22:50:58 +00:00
}