mirror of https://github.com/LemmyNet/lemmy
* Split activity table into sent and received parts (fixes #3103) The received activities are only stored in order to avoid processing the same incoming activity multiple times. For this purpose it is completely unnecessary to store the data. So we can split the table into sent_activity and received_activity parts, where only sent_activity table needs to store activity data. This should reduce storage use significantly. Also reduces activity storage duration to three months, we can reduce this further if necessary. Additionally the id columns of activity tables are removed because they are completely unused and risk overflowing (fixes #3560). * address review * move insert_received_activity() methods to verify handlers * remove unnecessary conflict line * clippy * use on conflict, add testsfix_hot_ranks_published
parent
2938b50908
commit
e9e76549a8
@ -1,143 +1,111 @@
|
||||
use crate::{
|
||||
diesel::OptionalExtension,
|
||||
newtypes::DbUrl,
|
||||
schema::activity::dsl::{activity, ap_id},
|
||||
source::activity::{Activity, ActivityInsertForm, ActivityUpdateForm},
|
||||
traits::Crud,
|
||||
source::activity::{ReceivedActivity, SentActivity, SentActivityForm},
|
||||
utils::{get_conn, DbPool},
|
||||
};
|
||||
use diesel::{dsl::insert_into, result::Error, ExpressionMethods, QueryDsl};
|
||||
use diesel::{
|
||||
dsl::insert_into,
|
||||
result::{DatabaseErrorKind, Error, Error::DatabaseError},
|
||||
ExpressionMethods,
|
||||
QueryDsl,
|
||||
};
|
||||
use diesel_async::RunQueryDsl;
|
||||
|
||||
#[async_trait]
|
||||
impl Crud for Activity {
|
||||
type InsertForm = ActivityInsertForm;
|
||||
type UpdateForm = ActivityUpdateForm;
|
||||
type IdType = i32;
|
||||
async fn read(pool: &mut DbPool<'_>, activity_id: i32) -> Result<Self, Error> {
|
||||
let conn = &mut get_conn(pool).await?;
|
||||
activity.find(activity_id).first::<Self>(conn).await
|
||||
}
|
||||
|
||||
async fn create(pool: &mut DbPool<'_>, new_activity: &Self::InsertForm) -> Result<Self, Error> {
|
||||
impl SentActivity {
|
||||
pub async fn create(pool: &mut DbPool<'_>, form: SentActivityForm) -> Result<Self, Error> {
|
||||
use crate::schema::sent_activity::dsl::sent_activity;
|
||||
let conn = &mut get_conn(pool).await?;
|
||||
insert_into(activity)
|
||||
.values(new_activity)
|
||||
insert_into(sent_activity)
|
||||
.values(form)
|
||||
.get_result::<Self>(conn)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn update(
|
||||
pool: &mut DbPool<'_>,
|
||||
activity_id: i32,
|
||||
new_activity: &Self::UpdateForm,
|
||||
) -> Result<Self, Error> {
|
||||
pub async fn read_from_apub_id(pool: &mut DbPool<'_>, object_id: &DbUrl) -> Result<Self, Error> {
|
||||
use crate::schema::sent_activity::dsl::{ap_id, sent_activity};
|
||||
let conn = &mut get_conn(pool).await?;
|
||||
diesel::update(activity.find(activity_id))
|
||||
.set(new_activity)
|
||||
.get_result::<Self>(conn)
|
||||
.await
|
||||
}
|
||||
async fn delete(pool: &mut DbPool<'_>, activity_id: i32) -> Result<usize, Error> {
|
||||
let conn = &mut get_conn(pool).await?;
|
||||
diesel::delete(activity.find(activity_id))
|
||||
.execute(conn)
|
||||
sent_activity
|
||||
.filter(ap_id.eq(object_id))
|
||||
.first::<Self>(conn)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl Activity {
|
||||
pub async fn read_from_apub_id(
|
||||
pool: &mut DbPool<'_>,
|
||||
object_id: &DbUrl,
|
||||
) -> Result<Activity, Error> {
|
||||
impl ReceivedActivity {
|
||||
pub async fn create(pool: &mut DbPool<'_>, ap_id_: &DbUrl) -> Result<(), Error> {
|
||||
use crate::schema::received_activity::dsl::{ap_id, id, received_activity};
|
||||
let conn = &mut get_conn(pool).await?;
|
||||
activity
|
||||
.filter(ap_id.eq(object_id))
|
||||
.first::<Self>(conn)
|
||||
let res = insert_into(received_activity)
|
||||
.values(ap_id.eq(ap_id_))
|
||||
.on_conflict_do_nothing()
|
||||
.returning(id)
|
||||
.get_result::<i64>(conn)
|
||||
.await
|
||||
.optional()?;
|
||||
if res.is_some() {
|
||||
// new activity inserted successfully
|
||||
Ok(())
|
||||
} else {
|
||||
// duplicate activity
|
||||
Err(DatabaseError(
|
||||
DatabaseErrorKind::UniqueViolation,
|
||||
Box::<String>::default(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
newtypes::DbUrl,
|
||||
source::{
|
||||
activity::{Activity, ActivityInsertForm},
|
||||
instance::Instance,
|
||||
person::{Person, PersonInsertForm},
|
||||
},
|
||||
utils::build_db_pool_for_tests,
|
||||
};
|
||||
use serde_json::Value;
|
||||
use crate::utils::build_db_pool_for_tests;
|
||||
use serde_json::json;
|
||||
use serial_test::serial;
|
||||
use url::Url;
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_crud() {
|
||||
async fn receive_activity_duplicate() {
|
||||
let pool = &build_db_pool_for_tests().await;
|
||||
let pool = &mut pool.into();
|
||||
let ap_id: DbUrl = Url::parse("http://example.com/activity/531")
|
||||
.unwrap()
|
||||
.into();
|
||||
|
||||
let inserted_instance = Instance::read_or_create(pool, "my_domain.tld".to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let creator_form = PersonInsertForm::builder()
|
||||
.name("activity_creator_ pm".into())
|
||||
.public_key("pubkey".to_string())
|
||||
.instance_id(inserted_instance.id)
|
||||
.build();
|
||||
// inserting activity for first time
|
||||
let res = ReceivedActivity::create(pool, &ap_id).await;
|
||||
assert!(res.is_ok());
|
||||
|
||||
let inserted_creator = Person::create(pool, &creator_form).await.unwrap();
|
||||
let res = ReceivedActivity::create(pool, &ap_id).await;
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
let ap_id_: DbUrl = Url::parse(
|
||||
"https://enterprise.lemmy.ml/activities/delete/f1b5d57c-80f8-4e03-a615-688d552e946c",
|
||||
)
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn sent_activity_write_read() {
|
||||
let pool = &build_db_pool_for_tests().await;
|
||||
let pool = &mut pool.into();
|
||||
let ap_id: DbUrl = Url::parse("http://example.com/activity/412")
|
||||
.unwrap()
|
||||
.into();
|
||||
let test_json: Value = serde_json::from_str(
|
||||
r#"{
|
||||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"id": "https://enterprise.lemmy.ml/activities/delete/f1b5d57c-80f8-4e03-a615-688d552e946c",
|
||||
"type": "Delete",
|
||||
"actor": "https://enterprise.lemmy.ml/u/riker",
|
||||
"to": "https://www.w3.org/ns/activitystreams#Public",
|
||||
"cc": [
|
||||
"https://enterprise.lemmy.ml/c/main/"
|
||||
],
|
||||
"object": "https://enterprise.lemmy.ml/post/32"
|
||||
}"#,
|
||||
)
|
||||
.unwrap();
|
||||
let activity_form = ActivityInsertForm {
|
||||
ap_id: ap_id_.clone(),
|
||||
data: test_json.clone(),
|
||||
local: Some(true),
|
||||
sensitive: Some(false),
|
||||
updated: None,
|
||||
};
|
||||
|
||||
let inserted_activity = Activity::create(pool, &activity_form).await.unwrap();
|
||||
let data = json!({
|
||||
"key1": "0xF9BA143B95FF6D82",
|
||||
"key2": "42",
|
||||
});
|
||||
let sensitive = false;
|
||||
|
||||
let expected_activity = Activity {
|
||||
ap_id: ap_id_.clone(),
|
||||
id: inserted_activity.id,
|
||||
data: test_json,
|
||||
local: true,
|
||||
sensitive: false,
|
||||
published: inserted_activity.published,
|
||||
updated: None,
|
||||
let form = SentActivityForm {
|
||||
ap_id: ap_id.clone(),
|
||||
data: data.clone(),
|
||||
sensitive,
|
||||
};
|
||||
|
||||
let read_activity = Activity::read(pool, inserted_activity.id).await.unwrap();
|
||||
let read_activity_by_apub_id = Activity::read_from_apub_id(pool, &ap_id_).await.unwrap();
|
||||
Person::delete(pool, inserted_creator.id).await.unwrap();
|
||||
Activity::delete(pool, inserted_activity.id).await.unwrap();
|
||||
SentActivity::create(pool, form).await.unwrap();
|
||||
|
||||
assert_eq!(expected_activity, read_activity);
|
||||
assert_eq!(expected_activity, read_activity_by_apub_id);
|
||||
assert_eq!(expected_activity, inserted_activity);
|
||||
let res = SentActivity::read_from_apub_id(pool, &ap_id).await.unwrap();
|
||||
assert_eq!(res.ap_id, ap_id);
|
||||
assert_eq!(res.data, data);
|
||||
assert_eq!(res.sensitive, sensitive);
|
||||
}
|
||||
}
|
||||
|
@ -1,34 +1,28 @@
|
||||
use crate::{newtypes::DbUrl, schema::activity};
|
||||
use crate::{newtypes::DbUrl, schema::sent_activity};
|
||||
use serde_json::Value;
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Queryable, Identifiable)]
|
||||
#[diesel(table_name = activity)]
|
||||
pub struct Activity {
|
||||
pub id: i32,
|
||||
pub data: Value,
|
||||
pub local: bool,
|
||||
pub published: chrono::NaiveDateTime,
|
||||
pub updated: Option<chrono::NaiveDateTime>,
|
||||
#[derive(PartialEq, Eq, Debug, Queryable)]
|
||||
#[diesel(table_name = sent_activity)]
|
||||
pub struct SentActivity {
|
||||
pub id: i64,
|
||||
pub ap_id: DbUrl,
|
||||
pub data: Value,
|
||||
pub sensitive: bool,
|
||||
pub published: chrono::NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Insertable)]
|
||||
#[diesel(table_name = activity)]
|
||||
pub struct ActivityInsertForm {
|
||||
pub data: Value,
|
||||
pub local: Option<bool>,
|
||||
pub updated: Option<chrono::NaiveDateTime>,
|
||||
#[diesel(table_name = sent_activity)]
|
||||
pub struct SentActivityForm {
|
||||
pub ap_id: DbUrl,
|
||||
pub sensitive: Option<bool>,
|
||||
pub data: Value,
|
||||
pub sensitive: bool,
|
||||
}
|
||||
|
||||
#[derive(AsChangeset)]
|
||||
#[diesel(table_name = activity)]
|
||||
pub struct ActivityUpdateForm {
|
||||
pub data: Option<Value>,
|
||||
pub local: Option<bool>,
|
||||
pub updated: Option<Option<chrono::NaiveDateTime>>,
|
||||
pub sensitive: Option<bool>,
|
||||
#[derive(PartialEq, Eq, Debug, Queryable)]
|
||||
#[diesel(table_name = received_activity)]
|
||||
pub struct ReceivedActivity {
|
||||
pub id: i64,
|
||||
pub ap_id: DbUrl,
|
||||
pub published: chrono::NaiveDateTime,
|
||||
}
|
||||
|
@ -0,0 +1,21 @@
|
||||
create table activity (
|
||||
id serial primary key,
|
||||
data jsonb not null,
|
||||
local boolean not null default true,
|
||||
published timestamp not null default now(),
|
||||
updated timestamp,
|
||||
ap_id text not null,
|
||||
sensitive boolean not null default true
|
||||
);
|
||||
|
||||
insert into activity(ap_id, data, sensitive, published)
|
||||
select ap_id, data, sensitive, published
|
||||
from sent_activity
|
||||
order by id desc
|
||||
limit 100000;
|
||||
|
||||
-- We cant copy received_activity entries back into activities table because we dont have data
|
||||
-- which is mandatory.
|
||||
|
||||
drop table sent_activity;
|
||||
drop table received_activity;
|
@ -0,0 +1,35 @@
|
||||
-- outgoing activities, need to be stored to be later server over http
|
||||
-- we change data column from jsonb to json for decreased size
|
||||
-- https://stackoverflow.com/a/22910602
|
||||
create table sent_activity (
|
||||
id bigserial primary key,
|
||||
ap_id text unique not null,
|
||||
data json not null,
|
||||
sensitive boolean not null,
|
||||
published timestamp not null default now()
|
||||
);
|
||||
|
||||
-- incoming activities, we only need the id to avoid processing the same activity multiple times
|
||||
create table received_activity (
|
||||
id bigserial primary key,
|
||||
ap_id text unique not null,
|
||||
published timestamp not null default now()
|
||||
);
|
||||
|
||||
-- copy sent activities to new table. only copy last 100k for faster migration
|
||||
insert into sent_activity(ap_id, data, sensitive, published)
|
||||
select ap_id, data, sensitive, published
|
||||
from activity
|
||||
where local = true
|
||||
order by id desc
|
||||
limit 100000;
|
||||
|
||||
-- copy received activities to new table. only last 1m for faster migration
|
||||
insert into received_activity(ap_id, published)
|
||||
select ap_id, published
|
||||
from activity
|
||||
where local = false
|
||||
order by id desc
|
||||
limit 1000000;
|
||||
|
||||
drop table activity;
|
Loading…
Reference in New Issue