mirror of https://github.com/LemmyNet/lemmy
* Split activity table into sent and received parts (fixes #3103) The received activities are only stored in order to avoid processing the same incoming activity multiple times. For this purpose it is completely unnecessary to store the data. So we can split the table into sent_activity and received_activity parts, where only sent_activity table needs to store activity data. This should reduce storage use significantly. Also reduces activity storage duration to three months, we can reduce this further if necessary. Additionally the id columns of activity tables are removed because they are completely unused and risk overflowing (fixes #3560). * address review * move insert_received_activity() methods to verify handlers * remove unnecessary conflict line * clippy * use on conflict, add testsfix_hot_ranks_published
parent
2938b50908
commit
e9e76549a8
@ -1,143 +1,111 @@
|
|||||||
use crate::{
|
use crate::{
|
||||||
|
diesel::OptionalExtension,
|
||||||
newtypes::DbUrl,
|
newtypes::DbUrl,
|
||||||
schema::activity::dsl::{activity, ap_id},
|
source::activity::{ReceivedActivity, SentActivity, SentActivityForm},
|
||||||
source::activity::{Activity, ActivityInsertForm, ActivityUpdateForm},
|
|
||||||
traits::Crud,
|
|
||||||
utils::{get_conn, DbPool},
|
utils::{get_conn, DbPool},
|
||||||
};
|
};
|
||||||
use diesel::{dsl::insert_into, result::Error, ExpressionMethods, QueryDsl};
|
use diesel::{
|
||||||
|
dsl::insert_into,
|
||||||
|
result::{DatabaseErrorKind, Error, Error::DatabaseError},
|
||||||
|
ExpressionMethods,
|
||||||
|
QueryDsl,
|
||||||
|
};
|
||||||
use diesel_async::RunQueryDsl;
|
use diesel_async::RunQueryDsl;
|
||||||
|
|
||||||
#[async_trait]
|
impl SentActivity {
|
||||||
impl Crud for Activity {
|
pub async fn create(pool: &mut DbPool<'_>, form: SentActivityForm) -> Result<Self, Error> {
|
||||||
type InsertForm = ActivityInsertForm;
|
use crate::schema::sent_activity::dsl::sent_activity;
|
||||||
type UpdateForm = ActivityUpdateForm;
|
|
||||||
type IdType = i32;
|
|
||||||
async fn read(pool: &mut DbPool<'_>, activity_id: i32) -> Result<Self, Error> {
|
|
||||||
let conn = &mut get_conn(pool).await?;
|
|
||||||
activity.find(activity_id).first::<Self>(conn).await
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn create(pool: &mut DbPool<'_>, new_activity: &Self::InsertForm) -> Result<Self, Error> {
|
|
||||||
let conn = &mut get_conn(pool).await?;
|
let conn = &mut get_conn(pool).await?;
|
||||||
insert_into(activity)
|
insert_into(sent_activity)
|
||||||
.values(new_activity)
|
.values(form)
|
||||||
.get_result::<Self>(conn)
|
.get_result::<Self>(conn)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn update(
|
pub async fn read_from_apub_id(pool: &mut DbPool<'_>, object_id: &DbUrl) -> Result<Self, Error> {
|
||||||
pool: &mut DbPool<'_>,
|
use crate::schema::sent_activity::dsl::{ap_id, sent_activity};
|
||||||
activity_id: i32,
|
|
||||||
new_activity: &Self::UpdateForm,
|
|
||||||
) -> Result<Self, Error> {
|
|
||||||
let conn = &mut get_conn(pool).await?;
|
let conn = &mut get_conn(pool).await?;
|
||||||
diesel::update(activity.find(activity_id))
|
sent_activity
|
||||||
.set(new_activity)
|
.filter(ap_id.eq(object_id))
|
||||||
.get_result::<Self>(conn)
|
.first::<Self>(conn)
|
||||||
.await
|
|
||||||
}
|
|
||||||
async fn delete(pool: &mut DbPool<'_>, activity_id: i32) -> Result<usize, Error> {
|
|
||||||
let conn = &mut get_conn(pool).await?;
|
|
||||||
diesel::delete(activity.find(activity_id))
|
|
||||||
.execute(conn)
|
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Activity {
|
impl ReceivedActivity {
|
||||||
pub async fn read_from_apub_id(
|
pub async fn create(pool: &mut DbPool<'_>, ap_id_: &DbUrl) -> Result<(), Error> {
|
||||||
pool: &mut DbPool<'_>,
|
use crate::schema::received_activity::dsl::{ap_id, id, received_activity};
|
||||||
object_id: &DbUrl,
|
|
||||||
) -> Result<Activity, Error> {
|
|
||||||
let conn = &mut get_conn(pool).await?;
|
let conn = &mut get_conn(pool).await?;
|
||||||
activity
|
let res = insert_into(received_activity)
|
||||||
.filter(ap_id.eq(object_id))
|
.values(ap_id.eq(ap_id_))
|
||||||
.first::<Self>(conn)
|
.on_conflict_do_nothing()
|
||||||
|
.returning(id)
|
||||||
|
.get_result::<i64>(conn)
|
||||||
.await
|
.await
|
||||||
|
.optional()?;
|
||||||
|
if res.is_some() {
|
||||||
|
// new activity inserted successfully
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
// duplicate activity
|
||||||
|
Err(DatabaseError(
|
||||||
|
DatabaseErrorKind::UniqueViolation,
|
||||||
|
Box::<String>::default(),
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{
|
use crate::utils::build_db_pool_for_tests;
|
||||||
newtypes::DbUrl,
|
use serde_json::json;
|
||||||
source::{
|
|
||||||
activity::{Activity, ActivityInsertForm},
|
|
||||||
instance::Instance,
|
|
||||||
person::{Person, PersonInsertForm},
|
|
||||||
},
|
|
||||||
utils::build_db_pool_for_tests,
|
|
||||||
};
|
|
||||||
use serde_json::Value;
|
|
||||||
use serial_test::serial;
|
use serial_test::serial;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
#[serial]
|
#[serial]
|
||||||
async fn test_crud() {
|
async fn receive_activity_duplicate() {
|
||||||
let pool = &build_db_pool_for_tests().await;
|
let pool = &build_db_pool_for_tests().await;
|
||||||
let pool = &mut pool.into();
|
let pool = &mut pool.into();
|
||||||
|
let ap_id: DbUrl = Url::parse("http://example.com/activity/531")
|
||||||
|
.unwrap()
|
||||||
|
.into();
|
||||||
|
|
||||||
let inserted_instance = Instance::read_or_create(pool, "my_domain.tld".to_string())
|
// inserting activity for first time
|
||||||
.await
|
let res = ReceivedActivity::create(pool, &ap_id).await;
|
||||||
.unwrap();
|
assert!(res.is_ok());
|
||||||
|
|
||||||
let creator_form = PersonInsertForm::builder()
|
|
||||||
.name("activity_creator_ pm".into())
|
|
||||||
.public_key("pubkey".to_string())
|
|
||||||
.instance_id(inserted_instance.id)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
let inserted_creator = Person::create(pool, &creator_form).await.unwrap();
|
let res = ReceivedActivity::create(pool, &ap_id).await;
|
||||||
|
assert!(res.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
let ap_id_: DbUrl = Url::parse(
|
#[tokio::test]
|
||||||
"https://enterprise.lemmy.ml/activities/delete/f1b5d57c-80f8-4e03-a615-688d552e946c",
|
#[serial]
|
||||||
)
|
async fn sent_activity_write_read() {
|
||||||
|
let pool = &build_db_pool_for_tests().await;
|
||||||
|
let pool = &mut pool.into();
|
||||||
|
let ap_id: DbUrl = Url::parse("http://example.com/activity/412")
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into();
|
.into();
|
||||||
let test_json: Value = serde_json::from_str(
|
let data = json!({
|
||||||
r#"{
|
"key1": "0xF9BA143B95FF6D82",
|
||||||
"@context": "https://www.w3.org/ns/activitystreams",
|
"key2": "42",
|
||||||
"id": "https://enterprise.lemmy.ml/activities/delete/f1b5d57c-80f8-4e03-a615-688d552e946c",
|
});
|
||||||
"type": "Delete",
|
let sensitive = false;
|
||||||
"actor": "https://enterprise.lemmy.ml/u/riker",
|
|
||||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
|
||||||
"cc": [
|
|
||||||
"https://enterprise.lemmy.ml/c/main/"
|
|
||||||
],
|
|
||||||
"object": "https://enterprise.lemmy.ml/post/32"
|
|
||||||
}"#,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let activity_form = ActivityInsertForm {
|
|
||||||
ap_id: ap_id_.clone(),
|
|
||||||
data: test_json.clone(),
|
|
||||||
local: Some(true),
|
|
||||||
sensitive: Some(false),
|
|
||||||
updated: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let inserted_activity = Activity::create(pool, &activity_form).await.unwrap();
|
|
||||||
|
|
||||||
let expected_activity = Activity {
|
let form = SentActivityForm {
|
||||||
ap_id: ap_id_.clone(),
|
ap_id: ap_id.clone(),
|
||||||
id: inserted_activity.id,
|
data: data.clone(),
|
||||||
data: test_json,
|
sensitive,
|
||||||
local: true,
|
|
||||||
sensitive: false,
|
|
||||||
published: inserted_activity.published,
|
|
||||||
updated: None,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let read_activity = Activity::read(pool, inserted_activity.id).await.unwrap();
|
SentActivity::create(pool, form).await.unwrap();
|
||||||
let read_activity_by_apub_id = Activity::read_from_apub_id(pool, &ap_id_).await.unwrap();
|
|
||||||
Person::delete(pool, inserted_creator.id).await.unwrap();
|
|
||||||
Activity::delete(pool, inserted_activity.id).await.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(expected_activity, read_activity);
|
let res = SentActivity::read_from_apub_id(pool, &ap_id).await.unwrap();
|
||||||
assert_eq!(expected_activity, read_activity_by_apub_id);
|
assert_eq!(res.ap_id, ap_id);
|
||||||
assert_eq!(expected_activity, inserted_activity);
|
assert_eq!(res.data, data);
|
||||||
|
assert_eq!(res.sensitive, sensitive);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,34 +1,28 @@
|
|||||||
use crate::{newtypes::DbUrl, schema::activity};
|
use crate::{newtypes::DbUrl, schema::sent_activity};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Queryable, Identifiable)]
|
#[derive(PartialEq, Eq, Debug, Queryable)]
|
||||||
#[diesel(table_name = activity)]
|
#[diesel(table_name = sent_activity)]
|
||||||
pub struct Activity {
|
pub struct SentActivity {
|
||||||
pub id: i32,
|
pub id: i64,
|
||||||
pub data: Value,
|
|
||||||
pub local: bool,
|
|
||||||
pub published: chrono::NaiveDateTime,
|
|
||||||
pub updated: Option<chrono::NaiveDateTime>,
|
|
||||||
pub ap_id: DbUrl,
|
pub ap_id: DbUrl,
|
||||||
|
pub data: Value,
|
||||||
pub sensitive: bool,
|
pub sensitive: bool,
|
||||||
|
pub published: chrono::NaiveDateTime,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Insertable)]
|
#[derive(Insertable)]
|
||||||
#[diesel(table_name = activity)]
|
#[diesel(table_name = sent_activity)]
|
||||||
pub struct ActivityInsertForm {
|
pub struct SentActivityForm {
|
||||||
pub data: Value,
|
|
||||||
pub local: Option<bool>,
|
|
||||||
pub updated: Option<chrono::NaiveDateTime>,
|
|
||||||
pub ap_id: DbUrl,
|
pub ap_id: DbUrl,
|
||||||
pub sensitive: Option<bool>,
|
pub data: Value,
|
||||||
|
pub sensitive: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(AsChangeset)]
|
#[derive(PartialEq, Eq, Debug, Queryable)]
|
||||||
#[diesel(table_name = activity)]
|
#[diesel(table_name = received_activity)]
|
||||||
pub struct ActivityUpdateForm {
|
pub struct ReceivedActivity {
|
||||||
pub data: Option<Value>,
|
pub id: i64,
|
||||||
pub local: Option<bool>,
|
pub ap_id: DbUrl,
|
||||||
pub updated: Option<Option<chrono::NaiveDateTime>>,
|
pub published: chrono::NaiveDateTime,
|
||||||
pub sensitive: Option<bool>,
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,21 @@
|
|||||||
|
create table activity (
|
||||||
|
id serial primary key,
|
||||||
|
data jsonb not null,
|
||||||
|
local boolean not null default true,
|
||||||
|
published timestamp not null default now(),
|
||||||
|
updated timestamp,
|
||||||
|
ap_id text not null,
|
||||||
|
sensitive boolean not null default true
|
||||||
|
);
|
||||||
|
|
||||||
|
insert into activity(ap_id, data, sensitive, published)
|
||||||
|
select ap_id, data, sensitive, published
|
||||||
|
from sent_activity
|
||||||
|
order by id desc
|
||||||
|
limit 100000;
|
||||||
|
|
||||||
|
-- We cant copy received_activity entries back into activities table because we dont have data
|
||||||
|
-- which is mandatory.
|
||||||
|
|
||||||
|
drop table sent_activity;
|
||||||
|
drop table received_activity;
|
@ -0,0 +1,35 @@
|
|||||||
|
-- outgoing activities, need to be stored to be later server over http
|
||||||
|
-- we change data column from jsonb to json for decreased size
|
||||||
|
-- https://stackoverflow.com/a/22910602
|
||||||
|
create table sent_activity (
|
||||||
|
id bigserial primary key,
|
||||||
|
ap_id text unique not null,
|
||||||
|
data json not null,
|
||||||
|
sensitive boolean not null,
|
||||||
|
published timestamp not null default now()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- incoming activities, we only need the id to avoid processing the same activity multiple times
|
||||||
|
create table received_activity (
|
||||||
|
id bigserial primary key,
|
||||||
|
ap_id text unique not null,
|
||||||
|
published timestamp not null default now()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- copy sent activities to new table. only copy last 100k for faster migration
|
||||||
|
insert into sent_activity(ap_id, data, sensitive, published)
|
||||||
|
select ap_id, data, sensitive, published
|
||||||
|
from activity
|
||||||
|
where local = true
|
||||||
|
order by id desc
|
||||||
|
limit 100000;
|
||||||
|
|
||||||
|
-- copy received activities to new table. only last 1m for faster migration
|
||||||
|
insert into received_activity(ap_id, published)
|
||||||
|
select ap_id, published
|
||||||
|
from activity
|
||||||
|
where local = false
|
||||||
|
order by id desc
|
||||||
|
limit 1000000;
|
||||||
|
|
||||||
|
drop table activity;
|
Loading…
Reference in New Issue