mirror of https://github.com/LemmyNet/lemmy
Rewrite fetcher (#1792)
* Use new fetcher implementation for post/comment * rewrite person fetch to use new fetcher * rewrite community to use new fetcher * rename new_fetcher to dereference_object_id * make ObjectId a newtype * handle deletion in new fetcher * rewrite apub object search to be generic * move upsert() method out of ApubObject trait * simplify ObjectId::new (and fix clippy)remove_settings_and_secret_singletons_squashed
parent
21346eb786
commit
527eefbe92
@ -0,0 +1,85 @@
|
||||
use crate::fetcher::post_or_comment::PostOrComment;
|
||||
use lemmy_api_common::blocking;
|
||||
use lemmy_db_queries::source::{
|
||||
comment::Comment_,
|
||||
community::Community_,
|
||||
person::Person_,
|
||||
post::Post_,
|
||||
};
|
||||
use lemmy_db_schema::source::{comment::Comment, community::Community, person::Person, post::Post};
|
||||
use lemmy_utils::LemmyError;
|
||||
use lemmy_websocket::LemmyContext;
|
||||
|
||||
// TODO: merge this trait with ApubObject (means that db_schema needs to depend on apub_lib)
|
||||
#[async_trait::async_trait(?Send)]
|
||||
pub trait DeletableApubObject {
|
||||
// TODO: pass in tombstone with summary field, to decide between remove/delete
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError>;
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl DeletableApubObject for Community {
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
|
||||
let id = self.id;
|
||||
blocking(context.pool(), move |conn| {
|
||||
Community::update_deleted(conn, id, true)
|
||||
})
|
||||
.await??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl DeletableApubObject for Person {
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
|
||||
let id = self.id;
|
||||
blocking(context.pool(), move |conn| Person::delete_account(conn, id)).await??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl DeletableApubObject for Post {
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
|
||||
let id = self.id;
|
||||
blocking(context.pool(), move |conn| {
|
||||
Post::update_deleted(conn, id, true)
|
||||
})
|
||||
.await??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl DeletableApubObject for Comment {
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
|
||||
let id = self.id;
|
||||
blocking(context.pool(), move |conn| {
|
||||
Comment::update_deleted(conn, id, true)
|
||||
})
|
||||
.await??;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl DeletableApubObject for PostOrComment {
|
||||
async fn delete(self, context: &LemmyContext) -> Result<(), LemmyError> {
|
||||
match self {
|
||||
PostOrComment::Comment(c) => {
|
||||
blocking(context.pool(), move |conn| {
|
||||
Comment::update_deleted(conn, c.id, true)
|
||||
})
|
||||
.await??;
|
||||
}
|
||||
PostOrComment::Post(p) => {
|
||||
blocking(context.pool(), move |conn| {
|
||||
Post::update_deleted(conn, p.id, true)
|
||||
})
|
||||
.await??;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -0,0 +1,157 @@
|
||||
use crate::{
|
||||
fetcher::{deletable_apub_object::DeletableApubObject, should_refetch_actor},
|
||||
objects::FromApub,
|
||||
APUB_JSON_CONTENT_TYPE,
|
||||
};
|
||||
use anyhow::anyhow;
|
||||
use diesel::NotFound;
|
||||
use lemmy_api_common::blocking;
|
||||
use lemmy_db_queries::{ApubObject, DbPool};
|
||||
use lemmy_db_schema::DbUrl;
|
||||
use lemmy_utils::{request::retry, settings::structs::Settings, LemmyError};
|
||||
use lemmy_websocket::LemmyContext;
|
||||
use reqwest::StatusCode;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
fmt::{Debug, Display, Formatter},
|
||||
marker::PhantomData,
|
||||
time::Duration,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
/// Maximum number of HTTP requests allowed to handle a single incoming activity (or a single object
|
||||
/// fetch through the search). This should be configurable.
|
||||
static REQUEST_LIMIT: i32 = 25;
|
||||
|
||||
#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
|
||||
pub struct ObjectId<Kind>(Url, #[serde(skip)] PhantomData<Kind>)
|
||||
where
|
||||
Kind: FromApub + ApubObject + DeletableApubObject + Send + 'static,
|
||||
for<'de2> <Kind as FromApub>::ApubType: serde::Deserialize<'de2>;
|
||||
|
||||
impl<Kind> ObjectId<Kind>
|
||||
where
|
||||
Kind: FromApub + ApubObject + DeletableApubObject + Send + 'static,
|
||||
for<'de> <Kind as FromApub>::ApubType: serde::Deserialize<'de>,
|
||||
{
|
||||
pub fn new<T>(url: T) -> Self
|
||||
where
|
||||
T: Into<Url>,
|
||||
{
|
||||
ObjectId(url.into(), PhantomData::<Kind>)
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &Url {
|
||||
&self.0
|
||||
}
|
||||
|
||||
/// Fetches an activitypub object, either from local database (if possible), or over http.
|
||||
pub(crate) async fn dereference(
|
||||
&self,
|
||||
context: &LemmyContext,
|
||||
request_counter: &mut i32,
|
||||
) -> Result<Kind, LemmyError> {
|
||||
let db_object = self.dereference_locally(context.pool()).await?;
|
||||
|
||||
// if its a local object, only fetch it from the database and not over http
|
||||
if self.0.domain() == Some(&Settings::get().get_hostname_without_port()?) {
|
||||
return match db_object {
|
||||
None => Err(NotFound {}.into()),
|
||||
Some(o) => Ok(o),
|
||||
};
|
||||
}
|
||||
|
||||
if let Some(object) = db_object {
|
||||
if let Some(last_refreshed_at) = object.last_refreshed_at() {
|
||||
// TODO: rename to should_refetch_object()
|
||||
if should_refetch_actor(last_refreshed_at) {
|
||||
return self
|
||||
.dereference_remotely(context, request_counter, Some(object))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(object)
|
||||
} else {
|
||||
self
|
||||
.dereference_remotely(context, request_counter, None)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// returning none means the object was not found in local db
|
||||
async fn dereference_locally(&self, pool: &DbPool) -> Result<Option<Kind>, LemmyError> {
|
||||
let id: DbUrl = self.0.clone().into();
|
||||
let object = blocking(pool, move |conn| ApubObject::read_from_apub_id(conn, &id)).await?;
|
||||
match object {
|
||||
Ok(o) => Ok(Some(o)),
|
||||
Err(NotFound {}) => Ok(None),
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn dereference_remotely(
|
||||
&self,
|
||||
context: &LemmyContext,
|
||||
request_counter: &mut i32,
|
||||
db_object: Option<Kind>,
|
||||
) -> Result<Kind, LemmyError> {
|
||||
// dont fetch local objects this way
|
||||
debug_assert!(self.0.domain() != Some(&Settings::get().hostname));
|
||||
|
||||
*request_counter += 1;
|
||||
if *request_counter > REQUEST_LIMIT {
|
||||
return Err(LemmyError::from(anyhow!("Request limit reached")));
|
||||
}
|
||||
|
||||
let res = retry(|| {
|
||||
context
|
||||
.client()
|
||||
.get(self.0.as_str())
|
||||
.header("Accept", APUB_JSON_CONTENT_TYPE)
|
||||
.timeout(Duration::from_secs(60))
|
||||
.send()
|
||||
})
|
||||
.await?;
|
||||
|
||||
if res.status() == StatusCode::GONE {
|
||||
if let Some(db_object) = db_object {
|
||||
db_object.delete(context).await?;
|
||||
}
|
||||
return Err(anyhow!("Fetched remote object {} which was deleted", self).into());
|
||||
}
|
||||
|
||||
let res2: Kind::ApubType = res.json().await?;
|
||||
|
||||
Ok(Kind::from_apub(&res2, context, self.inner(), request_counter).await?)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Kind> Display for ObjectId<Kind>
|
||||
where
|
||||
Kind: FromApub + ApubObject + DeletableApubObject + Send + 'static,
|
||||
for<'de> <Kind as FromApub>::ApubType: serde::Deserialize<'de>,
|
||||
{
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Kind> From<ObjectId<Kind>> for Url
|
||||
where
|
||||
Kind: FromApub + ApubObject + DeletableApubObject + Send + 'static,
|
||||
for<'de> <Kind as FromApub>::ApubType: serde::Deserialize<'de>,
|
||||
{
|
||||
fn from(id: ObjectId<Kind>) -> Self {
|
||||
id.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<Kind> From<ObjectId<Kind>> for DbUrl
|
||||
where
|
||||
Kind: FromApub + ApubObject + DeletableApubObject + Send + 'static,
|
||||
for<'de> <Kind as FromApub>::ApubType: serde::Deserialize<'de>,
|
||||
{
|
||||
fn from(id: ObjectId<Kind>) -> Self {
|
||||
id.0.into()
|
||||
}
|
||||
}
|
@ -1,97 +0,0 @@
|
||||
use crate::{
|
||||
fetcher::fetch::fetch_remote_object,
|
||||
objects::{comment::Note, post::Page, FromApub},
|
||||
PostOrComment,
|
||||
};
|
||||
use anyhow::anyhow;
|
||||
use diesel::result::Error::NotFound;
|
||||
use lemmy_api_common::blocking;
|
||||
use lemmy_db_queries::{ApubObject, Crud};
|
||||
use lemmy_db_schema::source::{comment::Comment, post::Post};
|
||||
use lemmy_utils::LemmyError;
|
||||
use lemmy_websocket::LemmyContext;
|
||||
use log::debug;
|
||||
use url::Url;
|
||||
|
||||
/// Gets a post by its apub ID. If it exists locally, it is returned directly. Otherwise it is
|
||||
/// pulled from its apub ID, inserted and returned.
|
||||
///
|
||||
/// The parent community is also pulled if necessary. Comments are not pulled.
|
||||
pub(crate) async fn get_or_fetch_and_insert_post(
|
||||
post_ap_id: &Url,
|
||||
context: &LemmyContext,
|
||||
recursion_counter: &mut i32,
|
||||
) -> Result<Post, LemmyError> {
|
||||
let post_ap_id_owned = post_ap_id.to_owned();
|
||||
let post = blocking(context.pool(), move |conn| {
|
||||
Post::read_from_apub_id(conn, &post_ap_id_owned.into())
|
||||
})
|
||||
.await?;
|
||||
|
||||
match post {
|
||||
Ok(p) => Ok(p),
|
||||
Err(NotFound {}) => {
|
||||
debug!("Fetching and creating remote post: {}", post_ap_id);
|
||||
let page =
|
||||
fetch_remote_object::<Page>(context.client(), post_ap_id, recursion_counter).await?;
|
||||
let post = Post::from_apub(&page, context, post_ap_id, recursion_counter).await?;
|
||||
|
||||
Ok(post)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets a comment by its apub ID. If it exists locally, it is returned directly. Otherwise it is
|
||||
/// pulled from its apub ID, inserted and returned.
|
||||
///
|
||||
/// The parent community, post and comment are also pulled if necessary.
|
||||
pub(crate) async fn get_or_fetch_and_insert_comment(
|
||||
comment_ap_id: &Url,
|
||||
context: &LemmyContext,
|
||||
recursion_counter: &mut i32,
|
||||
) -> Result<Comment, LemmyError> {
|
||||
let comment_ap_id_owned = comment_ap_id.to_owned();
|
||||
let comment = blocking(context.pool(), move |conn| {
|
||||
Comment::read_from_apub_id(conn, &comment_ap_id_owned.into())
|
||||
})
|
||||
.await?;
|
||||
|
||||
match comment {
|
||||
Ok(p) => Ok(p),
|
||||
Err(NotFound {}) => {
|
||||
debug!(
|
||||
"Fetching and creating remote comment and its parents: {}",
|
||||
comment_ap_id
|
||||
);
|
||||
let comment =
|
||||
fetch_remote_object::<Note>(context.client(), comment_ap_id, recursion_counter).await?;
|
||||
let comment = Comment::from_apub(&comment, context, comment_ap_id, recursion_counter).await?;
|
||||
|
||||
let post_id = comment.post_id;
|
||||
let post = blocking(context.pool(), move |conn| Post::read(conn, post_id)).await??;
|
||||
if post.locked {
|
||||
return Err(anyhow!("Post is locked").into());
|
||||
}
|
||||
|
||||
Ok(comment)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_or_fetch_and_insert_post_or_comment(
|
||||
ap_id: &Url,
|
||||
context: &LemmyContext,
|
||||
recursion_counter: &mut i32,
|
||||
) -> Result<PostOrComment, LemmyError> {
|
||||
Ok(
|
||||
match get_or_fetch_and_insert_post(ap_id, context, recursion_counter).await {
|
||||
Ok(p) => PostOrComment::Post(Box::new(p)),
|
||||
Err(_) => {
|
||||
let c = get_or_fetch_and_insert_comment(ap_id, context, recursion_counter).await?;
|
||||
PostOrComment::Comment(Box::new(c))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
use crate::{
|
||||
fetcher::{fetch::fetch_remote_object, is_deleted, should_refetch_actor},
|
||||
objects::{person::Person as ApubPerson, FromApub},
|
||||
};
|
||||
use anyhow::anyhow;
|
||||
use diesel::result::Error::NotFound;
|
||||
use lemmy_api_common::blocking;
|
||||
use lemmy_db_queries::{source::person::Person_, ApubObject};
|
||||
use lemmy_db_schema::source::person::Person;
|
||||
use lemmy_utils::LemmyError;
|
||||
use lemmy_websocket::LemmyContext;
|
||||
use log::debug;
|
||||
use url::Url;
|
||||
|
||||
/// Get a person from its apub ID.
|
||||
///
|
||||
/// If it exists locally and `!should_refetch_actor()`, it is returned directly from the database.
|
||||
/// Otherwise it is fetched from the remote instance, stored and returned.
|
||||
pub(crate) async fn get_or_fetch_and_upsert_person(
|
||||
apub_id: &Url,
|
||||
context: &LemmyContext,
|
||||
recursion_counter: &mut i32,
|
||||
) -> Result<Person, LemmyError> {
|
||||
let apub_id_owned = apub_id.to_owned();
|
||||
let person = blocking(context.pool(), move |conn| {
|
||||
Person::read_from_apub_id(conn, &apub_id_owned.into())
|
||||
})
|
||||
.await?;
|
||||
|
||||
match person {
|
||||
// If its older than a day, re-fetch it
|
||||
Ok(u) if !u.local && should_refetch_actor(u.last_refreshed_at) => {
|
||||
debug!("Fetching and updating from remote person: {}", apub_id);
|
||||
let person =
|
||||
fetch_remote_object::<ApubPerson>(context.client(), apub_id, recursion_counter).await;
|
||||
|
||||
if is_deleted(&person) {
|
||||
// TODO: use Person::update_deleted() once implemented
|
||||
blocking(context.pool(), move |conn| {
|
||||
Person::delete_account(conn, u.id)
|
||||
})
|
||||
.await??;
|
||||
return Err(anyhow!("Person was deleted by remote instance").into());
|
||||
} else if person.is_err() {
|
||||
return Ok(u);
|
||||
}
|
||||
|
||||
let person = Person::from_apub(&person?, context, apub_id, recursion_counter).await?;
|
||||
|
||||
let person_id = person.id;
|
||||
blocking(context.pool(), move |conn| {
|
||||
Person::mark_as_updated(conn, person_id)
|
||||
})
|
||||
.await??;
|
||||
|
||||
Ok(person)
|
||||
}
|
||||
Ok(u) => Ok(u),
|
||||
Err(NotFound {}) => {
|
||||
debug!("Fetching and creating remote person: {}", apub_id);
|
||||
let person =
|
||||
fetch_remote_object::<ApubPerson>(context.client(), apub_id, recursion_counter).await?;
|
||||
|
||||
let person = Person::from_apub(&person, context, apub_id, recursion_counter).await?;
|
||||
|
||||
Ok(person)
|
||||
}
|
||||
Err(e) => Err(e.into()),
|
||||
}
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
use crate::objects::{comment::Note, post::Page, FromApub};
|
||||
use activitystreams::chrono::NaiveDateTime;
|
||||
use diesel::{result::Error, PgConnection};
|
||||
use lemmy_db_queries::ApubObject;
|
||||
use lemmy_db_schema::{
|
||||
source::{
|
||||
comment::{Comment, CommentForm},
|
||||
post::{Post, PostForm},
|
||||
},
|
||||
DbUrl,
|
||||
};
|
||||
use lemmy_utils::LemmyError;
|
||||
use lemmy_websocket::LemmyContext;
|
||||
use serde::Deserialize;
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum PostOrComment {
|
||||
Comment(Box<Comment>),
|
||||
Post(Box<Post>),
|
||||
}
|
||||
|
||||
pub enum PostOrCommentForm {
|
||||
PostForm(PostForm),
|
||||
CommentForm(CommentForm),
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub enum PageOrNote {
|
||||
Page(Page),
|
||||
Note(Note),
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl ApubObject for PostOrComment {
|
||||
fn last_refreshed_at(&self) -> Option<NaiveDateTime> {
|
||||
None
|
||||
}
|
||||
|
||||
// TODO: this can probably be implemented using a single sql query
|
||||
fn read_from_apub_id(conn: &PgConnection, object_id: &DbUrl) -> Result<Self, Error>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
let post = Post::read_from_apub_id(conn, object_id);
|
||||
Ok(match post {
|
||||
Ok(o) => PostOrComment::Post(Box::new(o)),
|
||||
Err(_) => PostOrComment::Comment(Box::new(Comment::read_from_apub_id(conn, object_id)?)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait(?Send)]
|
||||
impl FromApub for PostOrComment {
|
||||
type ApubType = PageOrNote;
|
||||
|
||||
async fn from_apub(
|
||||
apub: &PageOrNote,
|
||||
context: &LemmyContext,
|
||||
expected_domain: &Url,
|
||||
request_counter: &mut i32,
|
||||
) -> Result<Self, LemmyError>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
Ok(match apub {
|
||||
PageOrNote::Page(p) => PostOrComment::Post(Box::new(
|
||||
Post::from_apub(p, context, expected_domain, request_counter).await?,
|
||||
)),
|
||||
PageOrNote::Note(n) => PostOrComment::Comment(Box::new(
|
||||
Comment::from_apub(n, context, expected_domain, request_counter).await?,
|
||||
)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PostOrComment {
|
||||
pub(crate) fn ap_id(&self) -> Url {
|
||||
match self {
|
||||
PostOrComment::Post(p) => p.ap_id.clone(),
|
||||
PostOrComment::Comment(c) => c.ap_id.clone(),
|
||||
}
|
||||
.into()
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue