Merge branch 'main' into lemmynsfw-changes

lemmynsfw-changes
Felix Ableitner 5 months ago
commit 634024a5e4

@ -135,6 +135,18 @@ steps:
- diesel migration redo
when: *slow_check_paths
check_db_perf_tool:
image: *rust_image
environment:
LEMMY_DATABASE_URL: postgres://lemmy:password@database:5432/lemmy
RUST_BACKTRACE: "1"
CARGO_HOME: .cargo_home
commands:
# same as scripts/db_perf.sh but without creating a new database server
- export LEMMY_CONFIG_LOCATION=config/config.hjson
- cargo run --package lemmy_db_perf -- --posts 10 --read-post-pages 1
when: *slow_check_paths
cargo_clippy:
image: *rust_image
environment:

560
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,5 +1,5 @@
[workspace.package]
version = "0.19.3-rc.1"
version = "0.19.3"
edition = "2021"
description = "A link aggregator for the fediverse"
license = "AGPL-3.0"
@ -54,6 +54,7 @@ members = [
"crates/api_common",
"crates/apub",
"crates/utils",
"crates/db_perf",
"crates/db_schema",
"crates/db_views",
"crates/db_views_actor",
@ -85,25 +86,25 @@ unused_self = "deny"
unwrap_used = "deny"
[workspace.dependencies]
lemmy_api = { version = "=0.19.3-rc.1", path = "./crates/api" }
lemmy_api_crud = { version = "=0.19.3-rc.1", path = "./crates/api_crud" }
lemmy_apub = { version = "=0.19.3-rc.1", path = "./crates/apub" }
lemmy_utils = { version = "=0.19.3-rc.1", path = "./crates/utils" }
lemmy_db_schema = { version = "=0.19.3-rc.1", path = "./crates/db_schema" }
lemmy_api_common = { version = "=0.19.3-rc.1", path = "./crates/api_common" }
lemmy_routes = { version = "=0.19.3-rc.1", path = "./crates/routes" }
lemmy_db_views = { version = "=0.19.3-rc.1", path = "./crates/db_views" }
lemmy_db_views_actor = { version = "=0.19.3-rc.1", path = "./crates/db_views_actor" }
lemmy_db_views_moderator = { version = "=0.19.3-rc.1", path = "./crates/db_views_moderator" }
lemmy_api = { version = "=0.19.3", path = "./crates/api" }
lemmy_api_crud = { version = "=0.19.3", path = "./crates/api_crud" }
lemmy_apub = { version = "=0.19.3", path = "./crates/apub" }
lemmy_utils = { version = "=0.19.3", path = "./crates/utils" }
lemmy_db_schema = { version = "=0.19.3", path = "./crates/db_schema" }
lemmy_api_common = { version = "=0.19.3", path = "./crates/api_common" }
lemmy_routes = { version = "=0.19.3", path = "./crates/routes" }
lemmy_db_views = { version = "=0.19.3", path = "./crates/db_views" }
lemmy_db_views_actor = { version = "=0.19.3", path = "./crates/db_views_actor" }
lemmy_db_views_moderator = { version = "=0.19.3", path = "./crates/db_views_moderator" }
activitypub_federation = { version = "0.5.1-beta.1", default-features = false, features = [
"actix-web",
] }
diesel = "2.1.4"
diesel_migrations = "2.1.0"
diesel-async = "0.4.1"
serde = { version = "1.0.193", features = ["derive"] }
serde_with = "3.4.0"
actix-web = { version = "4.4.0", default-features = false, features = [
serde = { version = "1.0.195", features = ["derive"] }
serde_with = "3.5.1"
actix-web = { version = "4.4.1", default-features = false, features = [
"macros",
"rustls",
"compress-brotli",
@ -117,45 +118,47 @@ tracing-error = "0.2.0"
tracing-log = "0.2.0"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
url = { version = "2.5.0", features = ["serde"] }
reqwest = { version = "0.11.22", features = ["json", "blocking", "gzip"] }
reqwest = { version = "0.11.23", features = ["json", "blocking", "gzip"] }
reqwest-middleware = "0.2.4"
reqwest-tracing = "0.4.6"
reqwest-tracing = "0.4.7"
clokwerk = "0.4.0"
doku = { version = "0.21.1", features = ["url-2"] }
bcrypt = "0.15.0"
chrono = { version = "0.4.31", features = ["serde"], default-features = false }
serde_json = { version = "1.0.108", features = ["preserve_order"] }
base64 = "0.21.5"
uuid = { version = "1.6.1", features = ["serde", "v4"] }
async-trait = "0.1.74"
chrono = { version = "0.4.32", features = ["serde"], default-features = false }
serde_json = { version = "1.0.111", features = ["preserve_order"] }
base64 = "0.21.7"
uuid = { version = "1.7.0", features = ["serde", "v4"] }
async-trait = "0.1.77"
captcha = "0.0.9"
anyhow = { version = "1.0.75", features = [
anyhow = { version = "1.0.79", features = [
"backtrace",
] } # backtrace is on by default on nightly, but not stable rust
diesel_ltree = "0.3.0"
typed-builder = "0.18.0"
diesel_ltree = "0.3.1"
typed-builder = "0.18.1"
serial_test = "2.0.0"
tokio = { version = "1.35.0", features = ["full"] }
regex = "1.10.2"
tokio = { version = "1.35.1", features = ["full"] }
regex = "1.10.3"
once_cell = "1.19.0"
diesel-derive-newtype = "2.1.0"
diesel-derive-enum = { version = "2.1.0", features = ["postgres"] }
strum = "0.25.0"
strum_macros = "0.25.3"
itertools = "0.12.0"
futures = "0.3.29"
futures = "0.3.30"
http = "0.2.11"
percent-encoding = "2.3.1"
rosetta-i18n = "0.1.3"
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
tracing-opentelemetry = { version = "0.19.0" }
ts-rs = { version = "7.0.0", features = ["serde-compat", "chrono-impl"] }
ts-rs = { version = "7.1.1", features = ["serde-compat", "chrono-impl"] }
rustls = { version = "0.21.10", features = ["dangerous_configuration"] }
futures-util = "0.3.29"
futures-util = "0.3.30"
tokio-postgres = "0.7.10"
tokio-postgres-rustls = "0.10.0"
enum-map = "2.7"
moka = { version = "0.12.1", features = ["future"] }
moka = { version = "0.12.4", features = ["future"] }
i-love-jesus = { version = "0.1.0" }
clap = { version = "4.4.18", features = ["derive"] }
pretty_assertions = "1.4.0"
[dependencies]
@ -166,7 +169,7 @@ lemmy_utils = { workspace = true }
lemmy_db_schema = { workspace = true }
lemmy_api_common = { workspace = true }
lemmy_routes = { workspace = true }
lemmy_federate = { version = "0.19.3-rc.1", path = "crates/federate" }
lemmy_federate = { version = "0.19.3", path = "crates/federate" }
activitypub_federation = { workspace = true }
diesel = { workspace = true }
diesel-async = { workspace = true }
@ -186,14 +189,14 @@ tracing-opentelemetry = { workspace = true, optional = true }
opentelemetry = { workspace = true, optional = true }
console-subscriber = { version = "0.1.10", optional = true }
opentelemetry-otlp = { version = "0.12.0", optional = true }
pict-rs = { version = "0.5.0-rc.2", optional = true }
pict-rs = { version = "0.5.1", optional = true }
tokio.workspace = true
actix-cors = "0.6.5"
futures-util = { workspace = true }
chrono = { workspace = true }
prometheus = { version = "0.13.3", features = ["process"] }
serial_test = { workspace = true }
clap = { version = "4.4.11", features = ["derive"] }
clap = { workspace = true }
actix-web-prom = "0.7.0"
[dev-dependencies]

@ -27,7 +27,7 @@
"eslint": "^8.55.0",
"eslint-plugin-prettier": "^5.0.1",
"jest": "^29.5.0",
"lemmy-js-client": "0.19.0",
"lemmy-js-client": "0.19.2-alpha.2",
"prettier": "^3.1.1",
"ts-jest": "^29.1.0",
"typescript": "^5.3.3"

@ -24,21 +24,32 @@ test("Follow local community", async () => {
let community = (await resolveBetaCommunity(user)).community!;
expect(community.counts.subscribers).toBe(1);
expect(community.counts.subscribers_local).toBe(1);
let follow = await followCommunity(user, true, community.community.id);
// Make sure the follow response went through
expect(follow.community_view.community.local).toBe(true);
expect(follow.community_view.subscribed).toBe("Subscribed");
expect(follow.community_view.counts.subscribers).toBe(2);
expect(follow.community_view.counts.subscribers_local).toBe(2);
// Test an unfollow
let unfollow = await followCommunity(user, false, community.community.id);
expect(unfollow.community_view.subscribed).toBe("NotSubscribed");
expect(unfollow.community_view.counts.subscribers).toBe(1);
expect(unfollow.community_view.counts.subscribers_local).toBe(1);
});
test("Follow federated community", async () => {
let betaCommunity = (await resolveBetaCommunity(alpha)).community;
// It takes about 1 second for the community aggregates to federate
let betaCommunity = (
await waitUntil(
() => resolveBetaCommunity(alpha),
c =>
c.community?.counts.subscribers === 1 &&
c.community.counts.subscribers_local === 0,
)
).community;
if (!betaCommunity) {
throw "Missing beta community";
}
@ -55,10 +66,12 @@ test("Follow federated community", async () => {
expect(betaCommunity?.community.local).toBe(false);
expect(betaCommunity?.community.name).toBe("main");
expect(betaCommunity?.subscribed).toBe("Subscribed");
expect(betaCommunity?.counts.subscribers_local).toBe(1);
// check that unfollow was federated
let communityOnBeta1 = await resolveBetaCommunity(beta);
expect(communityOnBeta1.community?.counts.subscribers).toBe(2);
expect(communityOnBeta1.community?.counts.subscribers_local).toBe(1);
// Check it from local
let site = await getSite(alpha);
@ -83,4 +96,5 @@ test("Follow federated community", async () => {
// check that unfollow was federated
let communityOnBeta2 = await resolveBetaCommunity(beta);
expect(communityOnBeta2.community?.counts.subscribers).toBe(1);
expect(communityOnBeta2.community?.counts.subscribers_local).toBe(1);
});

@ -2286,10 +2286,10 @@ kleur@^3.0.3:
resolved "https://registry.yarnpkg.com/kleur/-/kleur-3.0.3.tgz#a79c9ecc86ee1ce3fa6206d1216c501f147fc07e"
integrity sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==
lemmy-js-client@0.19.0:
version "0.19.0"
resolved "https://registry.yarnpkg.com/lemmy-js-client/-/lemmy-js-client-0.19.0.tgz#50098183264fa176784857f45665b06994b31e18"
integrity sha512-h+E8wC9RKjlToWw9+kuGFAzk4Fiaf61KqAwzvoCDAfj2L1r+YNt5EDMOggGCoRx5PlqLuIVr7BNEU46KxJfmHA==
lemmy-js-client@0.19.2-alpha.2:
version "0.19.2-alpha.2"
resolved "https://registry.yarnpkg.com/lemmy-js-client/-/lemmy-js-client-0.19.2-alpha.2.tgz#09956df6392fa7df437343d1f1576b6297537113"
integrity sha512-/RztLo4EIDQeEN51awYJfx8JcNCHecOPrM14sSJ6/qLOOxQTPFsDrd7a2WplHpj7Wf8xci2UNfW26PmnVMOPaQ==
dependencies:
cross-fetch "^3.1.5"
form-data "^4.0.0"

@ -35,7 +35,7 @@ chrono = { workspace = true }
url = { workspace = true }
wav = "1.0.0"
sitemap-rs = "0.2.0"
totp-rs = { version = "5.4.0", features = ["gen_secret", "otpauth"] }
totp-rs = { version = "5.5.1", features = ["gen_secret", "otpauth"] }
actix-web-httpauth = "0.8.1"
[dev-dependencies]

@ -2,9 +2,9 @@ use actix_web::web::{Data, Json, Query};
use lemmy_api_common::{
comment::{ListCommentLikes, ListCommentLikesResponse},
context::LemmyContext,
utils::is_admin,
utils::is_mod_or_admin,
};
use lemmy_db_views::structs::{LocalUserView, VoteView};
use lemmy_db_views::structs::{CommentView, LocalUserView, VoteView};
use lemmy_utils::error::LemmyError;
/// Lists likes for a comment
@ -14,8 +14,18 @@ pub async fn list_comment_likes(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> Result<Json<ListCommentLikesResponse>, LemmyError> {
// Make sure user is an admin
is_admin(&local_user_view)?;
let comment_view = CommentView::read(
&mut context.pool(),
data.comment_id,
Some(local_user_view.person.id),
)
.await?;
is_mod_or_admin(
&mut context.pool(),
&local_user_view.person,
comment_view.community.id,
)
.await?;
let comment_likes =
VoteView::list_for_comment(&mut context.pool(), data.comment_id, data.page, data.limit).await?;

@ -2,8 +2,9 @@ use actix_web::web::{Data, Json, Query};
use lemmy_api_common::{
context::LemmyContext,
post::{ListPostLikes, ListPostLikesResponse},
utils::is_admin,
utils::is_mod_or_admin,
};
use lemmy_db_schema::{source::post::Post, traits::Crud};
use lemmy_db_views::structs::{LocalUserView, VoteView};
use lemmy_utils::error::LemmyError;
@ -14,8 +15,13 @@ pub async fn list_post_likes(
context: Data<LemmyContext>,
local_user_view: LocalUserView,
) -> Result<Json<ListPostLikesResponse>, LemmyError> {
// Make sure user is an admin
is_admin(&local_user_view)?;
let post = Post::read(&mut context.pool(), data.post_id).await?;
is_mod_or_admin(
&mut context.pool(),
&local_user_view.person,
post.community_id,
)
.await?;
let post_likes =
VoteView::list_for_post(&mut context.pool(), data.post_id, data.page, data.limit).await?;

@ -68,7 +68,7 @@ once_cell = { workspace = true, optional = true }
actix-web = { workspace = true, optional = true }
jsonwebtoken = { version = "8.3.0", optional = true }
# necessary for wasmt compilation
getrandom = { version = "0.2.11", features = ["js"] }
getrandom = { version = "0.2.12", features = ["js"] }
enum-map = { workspace = true }
[package.metadata.cargo-machete]

@ -1,6 +1,7 @@
use crate::sensitive::Sensitive;
use lemmy_db_schema::{
newtypes::{CommentReplyId, CommunityId, LanguageId, PersonId, PersonMentionId},
source::site::Site,
CommentSortType,
ListingType,
PostListingMode,
@ -172,12 +173,14 @@ pub struct GetPersonDetails {
pub saved_only: Option<bool>,
}
#[skip_serializing_none]
#[derive(Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// A person's details response.
pub struct GetPersonDetailsResponse {
pub person_view: PersonView,
pub site: Option<Site>,
pub comments: Vec<CommentView>,
pub posts: Vec<PostView>,
pub moderates: Vec<CommunityModeratorView>,

@ -20,6 +20,7 @@ use lemmy_db_schema::{
person::{Person, PersonUpdateForm},
person_block::PersonBlock,
post::{Post, PostRead},
site::Site,
},
traits::Crud,
utils::DbPool,
@ -547,6 +548,18 @@ pub fn check_private_instance_and_federation_enabled(
}
}
/// Read the site for an actor_id.
///
/// Used for GetCommunityResponse and GetPersonDetails
pub async fn read_site_for_actor(
actor_id: DbUrl,
context: &LemmyContext,
) -> Result<Option<Site>, LemmyError> {
let site_id = Site::instance_actor_id_from_url(actor_id.clone().into());
let site = Site::read_from_apub_id(&mut context.pool(), &site_id.into()).await?;
Ok(site)
}
pub async fn purge_image_posts_for_person(
banned_person_id: PersonId,
context: &LemmyContext,

@ -4,13 +4,12 @@ use actix_web::web::{Json, Query};
use lemmy_api_common::{
community::{GetCommunity, GetCommunityResponse},
context::LemmyContext,
utils::{check_private_instance, is_mod_or_admin_opt},
utils::{check_private_instance, is_mod_or_admin_opt, read_site_for_actor},
};
use lemmy_db_schema::source::{
actor_language::CommunityLanguage,
community::Community,
local_site::LocalSite,
site::Site,
};
use lemmy_db_views::structs::LocalUserView;
use lemmy_db_views_actor::structs::{CommunityModeratorView, CommunityView};
@ -64,15 +63,7 @@ pub async fn get_community(
.await
.with_lemmy_type(LemmyErrorType::CouldntFindCommunity)?;
let site_id = Site::instance_actor_id_from_url(community_view.community.actor_id.clone().into());
let mut site = Site::read_from_apub_id(&mut context.pool(), &site_id.into()).await?;
// no need to include metadata for local site (its already available through other endpoints).
// this also prevents us from leaking the federation private key.
if let Some(s) = &site {
if s.actor_id.domain() == Some(context.settings().hostname.as_ref()) {
site = None;
}
}
let site = read_site_for_actor(community_view.community.actor_id.clone(), &context).await?;
let community_id = community_view.community.id;
let discussion_languages = CommunityLanguage::read(&mut context.pool(), community_id).await?;

@ -4,7 +4,7 @@ use actix_web::web::{Json, Query};
use lemmy_api_common::{
context::LemmyContext,
person::{GetPersonDetails, GetPersonDetailsResponse},
utils::check_private_instance,
utils::{check_private_instance, read_site_for_actor},
};
use lemmy_db_schema::{source::person::Person, utils::post_to_comment_sort_type};
use lemmy_db_views::{
@ -90,9 +90,12 @@ pub async fn read_person(
let moderates =
CommunityModeratorView::for_person(&mut context.pool(), person_details_id).await?;
let site = read_site_for_actor(person_view.person.actor_id.clone(), &context).await?;
// Return the jwt
Ok(Json(GetPersonDetailsResponse {
person_view,
site,
moderates,
comments,
posts,

@ -106,10 +106,10 @@ pub async fn import_settings(
let local_user_form = LocalUserUpdateForm {
show_nsfw: data.settings.as_ref().map(|s| s.show_nsfw),
theme: data.settings.as_ref().map(|s| s.theme.clone()),
theme: data.settings.clone().map(|s| s.theme.clone()),
default_sort_type: data.settings.as_ref().map(|s| s.default_sort_type),
default_listing_type: data.settings.as_ref().map(|s| s.default_listing_type),
interface_language: data.settings.as_ref().map(|s| s.interface_language.clone()),
interface_language: data.settings.clone().map(|s| s.interface_language),
show_avatars: data.settings.as_ref().map(|s| s.show_avatars),
send_notifications_to_email: data
.settings

@ -15,7 +15,7 @@ use lemmy_utils::error::LemmyError;
use url::Url;
#[derive(Clone, Debug)]
pub(crate) struct ApubCommunityFeatured(Vec<ApubPost>);
pub(crate) struct ApubCommunityFeatured(());
#[async_trait::async_trait]
impl Collection for ApubCommunityFeatured {
@ -86,6 +86,6 @@ impl Collection for ApubCommunityFeatured {
.await;
// This return value is unused, so just set an empty vec
Ok(ApubCommunityFeatured(Vec::new()))
Ok(ApubCommunityFeatured(()))
}
}

@ -15,7 +15,7 @@ use lemmy_utils::error::LemmyError;
use url::Url;
#[derive(Clone, Debug)]
pub(crate) struct ApubCommunityFollower(Vec<()>);
pub(crate) struct ApubCommunityFollower(());
#[async_trait::async_trait]
impl Collection for ApubCommunityFollower {
@ -61,6 +61,6 @@ impl Collection for ApubCommunityFollower {
)
.await?;
Ok(ApubCommunityFollower(Vec::new()))
Ok(ApubCommunityFollower(()))
}
}

@ -19,7 +19,7 @@ use lemmy_utils::error::LemmyError;
use url::Url;
#[derive(Clone, Debug)]
pub(crate) struct ApubCommunityModerators(pub(crate) Vec<CommunityModeratorView>);
pub(crate) struct ApubCommunityModerators(());
#[async_trait::async_trait]
impl Collection for ApubCommunityModerators {
@ -96,7 +96,7 @@ impl Collection for ApubCommunityModerators {
}
// This return value is unused, so just set an empty vec
Ok(ApubCommunityModerators(Vec::new()))
Ok(ApubCommunityModerators(()))
}
}

@ -27,7 +27,7 @@ use lemmy_utils::error::LemmyError;
use url::Url;
#[derive(Clone, Debug)]
pub(crate) struct ApubCommunityOutbox(Vec<ApubPost>);
pub(crate) struct ApubCommunityOutbox(());
#[async_trait::async_trait]
impl Collection for ApubCommunityOutbox {
@ -111,6 +111,6 @@ impl Collection for ApubCommunityOutbox {
.await;
// This return value is unused, so just set an empty vec
Ok(ApubCommunityOutbox(Vec::new()))
Ok(ApubCommunityOutbox(()))
}
}

@ -0,0 +1,23 @@
[package]
name = "lemmy_db_perf"
version.workspace = true
edition.workspace = true
description.workspace = true
license.workspace = true
homepage.workspace = true
documentation.workspace = true
repository.workspace = true
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true }
diesel = { workspace = true }
diesel-async = { workspace = true }
lemmy_db_schema = { workspace = true }
lemmy_db_views = { workspace = true, features = ["full"] }
lemmy_utils = { workspace = true }
tokio = { workspace = true }

@ -0,0 +1,179 @@
mod series;
use crate::series::ValuesFromSeries;
use anyhow::Context;
use clap::Parser;
use diesel::{
dsl::{self, sql},
sql_types,
ExpressionMethods,
IntoSql,
};
use diesel_async::{RunQueryDsl, SimpleAsyncConnection};
use lemmy_db_schema::{
schema::post,
source::{
community::{Community, CommunityInsertForm},
instance::Instance,
person::{Person, PersonInsertForm},
},
traits::Crud,
utils::{build_db_pool, get_conn, now},
SortType,
};
use lemmy_db_views::{post_view::PostQuery, structs::PaginationCursor};
use lemmy_utils::error::{LemmyErrorExt2, LemmyResult};
use std::num::NonZeroU32;
#[derive(Parser, Debug)]
struct CmdArgs {
#[arg(long, default_value_t = 3.try_into().unwrap())]
communities: NonZeroU32,
#[arg(long, default_value_t = 3.try_into().unwrap())]
people: NonZeroU32,
#[arg(long, default_value_t = 100000.try_into().unwrap())]
posts: NonZeroU32,
#[arg(long, default_value_t = 0)]
read_post_pages: u32,
#[arg(long)]
explain_insertions: bool,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let mut result = try_main().await.into_anyhow();
if let Ok(path) = std::env::var("PGDATA") {
result = result.with_context(|| {
format!("Failed to run lemmy_db_perf (more details might be available in {path}/log)")
});
}
result
}
async fn try_main() -> LemmyResult<()> {
let args = CmdArgs::parse();
let pool = &build_db_pool().await?;
let pool = &mut pool.into();
let conn = &mut get_conn(pool).await?;
if args.explain_insertions {
// log_nested_statements is enabled to log trigger execution
conn
.batch_execute(
"SET auto_explain.log_min_duration = 0; SET auto_explain.log_nested_statements = on;",
)
.await?;
}
let instance = Instance::read_or_create(&mut conn.into(), "reddit.com".to_owned()).await?;
println!("🫃 creating {} people", args.people);
let mut person_ids = vec![];
for i in 0..args.people.get() {
let form = PersonInsertForm::builder()
.name(format!("p{i}"))
.public_key("pubkey".to_owned())
.instance_id(instance.id)
.build();
person_ids.push(Person::create(&mut conn.into(), &form).await?.id);
}
println!("🌍 creating {} communities", args.communities);
let mut community_ids = vec![];
for i in 0..args.communities.get() {
let form = CommunityInsertForm::builder()
.name(format!("c{i}"))
.title(i.to_string())
.instance_id(instance.id)
.build();
community_ids.push(Community::create(&mut conn.into(), &form).await?.id);
}
let post_batches = args.people.get() * args.communities.get();
let posts_per_batch = args.posts.get() / post_batches;
let num_posts = post_batches * posts_per_batch;
println!(
"📜 creating {} posts ({} featured in community)",
num_posts, post_batches
);
let mut num_inserted_posts = 0;
// TODO: progress bar
for person_id in &person_ids {
for community_id in &community_ids {
let n = dsl::insert_into(post::table)
.values(ValuesFromSeries {
start: 1,
stop: posts_per_batch.into(),
selection: (
"AAAAAAAAAAA".into_sql::<sql_types::Text>(),
person_id.into_sql::<sql_types::Integer>(),
community_id.into_sql::<sql_types::Integer>(),
series::current_value.eq(1),
now()
- sql::<sql_types::Interval>("make_interval(secs => ")
.bind::<sql_types::BigInt, _>(series::current_value)
.sql(")"),
),
})
.into_columns((
post::name,
post::creator_id,
post::community_id,
post::featured_community,
post::published,
))
.execute(conn)
.await?;
num_inserted_posts += n;
}
}
// Make sure the println above shows the correct amount
assert_eq!(num_inserted_posts, num_posts as usize);
// Enable auto_explain
conn
.batch_execute(
"SET auto_explain.log_min_duration = 0; SET auto_explain.log_nested_statements = off;",
)
.await?;
// TODO: show execution duration stats
let mut page_after = None;
for page_num in 1..=args.read_post_pages {
println!(
"👀 getting page {page_num} of posts (pagination cursor used: {})",
page_after.is_some()
);
// TODO: include local_user
let post_views = PostQuery {
community_id: community_ids.as_slice().first().cloned(),
sort: Some(SortType::New),
limit: Some(20),
page_after,
..Default::default()
}
.list(&mut conn.into())
.await?;
if let Some(post_view) = post_views.into_iter().last() {
println!("👀 getting pagination cursor data for next page");
let cursor_data = PaginationCursor::after_post(&post_view)
.read(&mut conn.into())
.await?;
page_after = Some(cursor_data);
} else {
println!("👀 reached empty page");
break;
}
}
// Delete everything, which might prevent problems if this is not run using scripts/db_perf.sh
Instance::delete(&mut conn.into(), instance.id).await?;
if let Ok(path) = std::env::var("PGDATA") {
println!("🪵 query plans written in {path}/log");
}
Ok(())
}

@ -0,0 +1,98 @@
use diesel::{
dsl,
expression::{is_aggregate, ValidGrouping},
pg::Pg,
query_builder::{AsQuery, AstPass, QueryFragment},
result::Error,
sql_types,
AppearsOnTable,
Expression,
Insertable,
QueryId,
SelectableExpression,
};
/// Gererates a series of rows for insertion.
///
/// An inclusive range is created from `start` and `stop`. A row for each number is generated using `selection`, which can be a tuple.
/// [`current_value`] is an expression that gets the current value.
///
/// For example, if there's a `numbers` table with a `number` column, this inserts all numbers from 1 to 10 in a single statement:
///
/// ```
/// dsl::insert_into(numbers::table)
/// .values(ValuesFromSeries {
/// start: 1,
/// stop: 10,
/// selection: series::current_value,
/// })
/// .into_columns(numbers::number)
/// ```
#[derive(QueryId)]
pub struct ValuesFromSeries<S> {
pub start: i64,
pub stop: i64,
pub selection: S,
}
impl<S: QueryFragment<Pg>> QueryFragment<Pg> for ValuesFromSeries<S> {
fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> Result<(), Error> {
self.selection.walk_ast(out.reborrow())?;
out.push_sql(" FROM generate_series(");
out.push_bind_param::<sql_types::BigInt, _>(&self.start)?;
out.push_sql(", ");
out.push_bind_param::<sql_types::BigInt, _>(&self.stop)?;
out.push_sql(")");
Ok(())
}
}
impl<S: Expression> Expression for ValuesFromSeries<S> {
type SqlType = S::SqlType;
}
impl<T, S: AppearsOnTable<current_value>> AppearsOnTable<T> for ValuesFromSeries<S> {}
impl<T, S: SelectableExpression<current_value>> SelectableExpression<T> for ValuesFromSeries<S> {}
impl<T, S: SelectableExpression<current_value>> Insertable<T> for ValuesFromSeries<S>
where
dsl::BareSelect<Self>: AsQuery + Insertable<T>,
{
type Values = <dsl::BareSelect<Self> as Insertable<T>>::Values;
fn values(self) -> Self::Values {
dsl::select(self).values()
}
}
impl<S: ValidGrouping<(), IsAggregate = is_aggregate::No>> ValidGrouping<()>
for ValuesFromSeries<S>
{
type IsAggregate = is_aggregate::No;
}
#[allow(non_camel_case_types)]
#[derive(QueryId, Clone, Copy, Debug)]
pub struct current_value;
impl QueryFragment<Pg> for current_value {
fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> Result<(), Error> {
out.push_identifier("generate_series")?;
Ok(())
}
}
impl Expression for current_value {
type SqlType = sql_types::BigInt;
}
impl AppearsOnTable<current_value> for current_value {}
impl SelectableExpression<current_value> for current_value {}
impl ValidGrouping<()> for current_value {
type IsAggregate = is_aggregate::No;
}

@ -36,6 +36,7 @@ full = [
"tokio-postgres",
"tokio-postgres-rustls",
"rustls",
"i-love-jesus",
]
[dependencies]
@ -76,6 +77,8 @@ tokio-postgres = { workspace = true, optional = true }
tokio-postgres-rustls = { workspace = true, optional = true }
rustls = { workspace = true, optional = true }
uuid = { workspace = true, features = ["v4"] }
i-love-jesus = { workspace = true, optional = true }
anyhow = { workspace = true }
[dev-dependencies]
serial_test = { workspace = true }

@ -156,6 +156,7 @@ mod tests {
.unwrap();
assert_eq!(2, community_aggregates_before_delete.subscribers);
assert_eq!(2, community_aggregates_before_delete.subscribers_local);
assert_eq!(1, community_aggregates_before_delete.posts);
assert_eq!(2, community_aggregates_before_delete.comments);
@ -164,6 +165,7 @@ mod tests {
.await
.unwrap();
assert_eq!(1, another_community_aggs.subscribers);
assert_eq!(1, another_community_aggs.subscribers_local);
assert_eq!(0, another_community_aggs.posts);
assert_eq!(0, another_community_aggs.comments);
@ -175,6 +177,7 @@ mod tests {
.await
.unwrap();
assert_eq!(1, after_unfollow.subscribers);
assert_eq!(1, after_unfollow.subscribers_local);
// Follow again just for the later tests
CommunityFollower::follow(pool, &second_person_follow)
@ -184,6 +187,7 @@ mod tests {
.await
.unwrap();
assert_eq!(2, after_follow_again.subscribers);
assert_eq!(2, after_follow_again.subscribers_local);
// Remove a parent post (the comment count should also be 0)
Post::delete(pool, inserted_post.id).await.unwrap();
@ -201,6 +205,7 @@ mod tests {
.await
.unwrap();
assert_eq!(1, after_person_delete.subscribers);
assert_eq!(1, after_person_delete.subscribers_local);
// This should delete all the associated rows, and fire triggers
let person_num_deleted = Person::delete(pool, inserted_person.id).await.unwrap();

@ -9,6 +9,8 @@ use crate::schema::{
site_aggregates,
};
use chrono::{DateTime, Utc};
#[cfg(feature = "full")]
use i_love_jesus::CursorKeysModule;
use serde::{Deserialize, Serialize};
#[cfg(feature = "full")]
use ts_rs::TS;
@ -66,6 +68,7 @@ pub struct CommunityAggregates {
pub users_active_half_year: i64,
#[serde(skip)]
pub hot_rank: f64,
pub subscribers_local: i64,
}
#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone, Default)]
@ -92,13 +95,21 @@ pub struct PersonAggregates {
#[derive(PartialEq, Debug, Serialize, Deserialize, Clone)]
#[cfg_attr(
feature = "full",
derive(Queryable, Selectable, Associations, Identifiable, TS)
derive(
Queryable,
Selectable,
Associations,
Identifiable,
TS,
CursorKeysModule
)
)]
#[cfg_attr(feature = "full", diesel(table_name = post_aggregates))]
#[cfg_attr(feature = "full", diesel(belongs_to(crate::source::post::Post)))]
#[cfg_attr(feature = "full", diesel(primary_key(post_id)))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", ts(export))]
#[cfg_attr(feature = "full", cursor_keys_module(name = post_aggregates_keys))]
/// Aggregate data for a post.
pub struct PostAggregates {
pub post_id: PostId,

@ -96,16 +96,18 @@ impl LocalUserLanguage {
.execute(conn)
.await?;
for l in lang_ids {
let form = LocalUserLanguageForm {
let forms = lang_ids
.into_iter()
.map(|l| LocalUserLanguageForm {
local_user_id: for_local_user_id,
language_id: l,
};
insert_into(local_user_language)
.values(form)
.get_result::<Self>(conn)
.await?;
}
})
.collect::<Vec<_>>();
insert_into(local_user_language)
.values(forms)
.execute(conn)
.await?;
Ok(())
}) as _
})
@ -164,16 +166,18 @@ impl SiteLanguage {
.execute(conn)
.await?;
for l in lang_ids {
let form = SiteLanguageForm {
let forms = lang_ids
.into_iter()
.map(|l| SiteLanguageForm {
site_id: for_site_id,
language_id: l,
};
insert_into(site_language)
.values(form)
.get_result::<Self>(conn)
.await?;
}
})
.collect::<Vec<_>>();
insert_into(site_language)
.values(forms)
.get_result::<Self>(conn)
.await?;
CommunityLanguage::limit_languages(conn, instance_id).await?;

@ -199,6 +199,7 @@ diesel::table! {
users_active_month -> Int8,
users_active_half_year -> Int8,
hot_rank -> Float8,
subscribers_local -> Int8,
}
}

@ -6,6 +6,7 @@ use crate::{
SortType,
};
use activitypub_federation::{fetch::object_id::ObjectId, traits::Object};
use anyhow::Context;
use chrono::{DateTime, Utc};
use deadpool::Runtime;
use diesel::{
@ -13,9 +14,11 @@ use diesel::{
deserialize::FromSql,
helper_types::AsExprOf,
pg::Pg,
query_builder::{Query, QueryFragment},
query_dsl::methods::LimitDsl,
result::{ConnectionError, ConnectionResult, Error as DieselError, Error::QueryBuilderError},
serialize::{Output, ToSql},
sql_types::{Text, Timestamptz},
sql_types::{self, Text, Timestamptz},
IntoSql,
PgConnection,
};
@ -29,6 +32,7 @@ use diesel_async::{
};
use diesel_migrations::EmbeddedMigrations;
use futures_util::{future::BoxFuture, Future, FutureExt};
use i_love_jesus::CursorKey;
use lemmy_utils::{
error::{LemmyError, LemmyErrorExt, LemmyErrorType},
settings::SETTINGS,
@ -150,6 +154,86 @@ macro_rules! try_join_with_pool {
}};
}
pub struct ReverseTimestampKey<K>(pub K);
impl<K, C> CursorKey<C> for ReverseTimestampKey<K>
where
K: CursorKey<C, SqlType = Timestamptz>,
{
type SqlType = sql_types::BigInt;
type CursorValue = functions::reverse_timestamp_sort::HelperType<K::CursorValue>;
type SqlValue = functions::reverse_timestamp_sort::HelperType<K::SqlValue>;
fn get_cursor_value(cursor: &C) -> Self::CursorValue {
functions::reverse_timestamp_sort(K::get_cursor_value(cursor))
}
fn get_sql_value() -> Self::SqlValue {
functions::reverse_timestamp_sort(K::get_sql_value())
}
}
/// Includes an SQL comment before `T`, which can be used to label auto_explain output
#[derive(QueryId)]
pub struct Commented<T> {
comment: String,
inner: T,
}
impl<T> Commented<T> {
pub fn new(inner: T) -> Self {
Commented {
comment: String::new(),
inner,
}
}
/// Adds `text` to the comment if `condition` is true
pub fn text_if(mut self, text: &str, condition: bool) -> Self {
if condition {
if !self.comment.is_empty() {
self.comment.push_str(", ");
}
self.comment.push_str(text);
}
self
}
/// Adds `text` to the comment
pub fn text(self, text: &str) -> Self {
self.text_if(text, true)
}
}
impl<T: Query> Query for Commented<T> {
type SqlType = T::SqlType;
}
impl<T: QueryFragment<Pg>> QueryFragment<Pg> for Commented<T> {
fn walk_ast<'b>(
&'b self,
mut out: diesel::query_builder::AstPass<'_, 'b, Pg>,
) -> Result<(), DieselError> {
for line in self.comment.lines() {
out.push_sql("\n-- ");
out.push_sql(line);
}
out.push_sql("\n");
self.inner.walk_ast(out.reborrow())
}
}
impl<T: LimitDsl> LimitDsl for Commented<T> {
type Output = Commented<T::Output>;
fn limit(self, limit: i64) -> Self::Output {
Commented {
comment: self.comment,
inner: self.inner.limit(limit),
}
}
}
pub fn fuzzy_search(q: &str) -> String {
let replaced = q.replace('%', "\\%").replace('_', "\\_").replace(' ', "%");
format!("%{replaced}%")
@ -275,15 +359,18 @@ impl ServerCertVerifier for NoCertVerifier {
pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!();
fn run_migrations(db_url: &str) {
fn run_migrations(db_url: &str) -> Result<(), LemmyError> {
// Needs to be a sync connection
let mut conn =
PgConnection::establish(db_url).unwrap_or_else(|e| panic!("Error connecting to {db_url}: {e}"));
PgConnection::establish(db_url).with_context(|| format!("Error connecting to {db_url}"))?;
info!("Running Database migrations (This may take a long time)...");
let _ = &mut conn
conn
.run_pending_migrations(MIGRATIONS)
.unwrap_or_else(|e| panic!("Couldn't run DB Migrations: {e}"));
.map_err(|e| anyhow::anyhow!("Couldn't run DB Migrations: {e}"))?;
info!("Database migrations complete.");
Ok(())
}
pub async fn build_db_pool() -> Result<ActualDbPool, LemmyError> {
@ -304,7 +391,7 @@ pub async fn build_db_pool() -> Result<ActualDbPool, LemmyError> {
.runtime(Runtime::Tokio1)
.build()?;
run_migrations(&db_url);
run_migrations(&db_url)?;
Ok(pool)
}
@ -357,6 +444,8 @@ pub mod functions {
fn controversy_rank(upvotes: BigInt, downvotes: BigInt, score: BigInt) -> Double;
}
sql_function!(fn reverse_timestamp_sort(time: Timestamptz) -> BigInt);
sql_function!(fn lower(x: Text) -> Text);
// really this function is variadic, this just adds the two-argument version

@ -23,6 +23,7 @@ full = [
"tracing",
"ts-rs",
"actix-web",
"i-love-jesus",
"lemmy_db_schema/full",
]
@ -37,7 +38,7 @@ serde_with = { workspace = true }
tracing = { workspace = true, optional = true }
ts-rs = { workspace = true, optional = true }
actix-web = { workspace = true, optional = true }
url = { workspace = true }
i-love-jesus = { workspace = true, optional = true }
[dev-dependencies]
serial_test = { workspace = true }

@ -3,6 +3,7 @@ use diesel::{
debug_query,
dsl::{exists, not, IntervalDsl},
pg::Pg,
query_builder::AsQuery,
result::Error,
sql_types,
BoolExpressionMethods,
@ -16,8 +17,9 @@ use diesel::{
QueryDsl,
};
use diesel_async::RunQueryDsl;
use i_love_jesus::PaginatedQueryBuilder;
use lemmy_db_schema::{
aggregates::structs::PostAggregates,
aggregates::structs::{post_aggregates_keys as key, PostAggregates},
newtypes::{CommunityId, LocalUserId, PersonId, PostId},
schema::{
community,
@ -44,45 +46,19 @@ use lemmy_db_schema::{
get_conn,
limit_and_offset,
now,
Commented,
DbConn,
DbPool,
ListFn,
Queries,
ReadFn,
ReverseTimestampKey,
},
ListingType,
SortType,
};
use tracing::debug;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Ord {
Desc,
Asc,
}
struct PaginationCursorField<Q, QS> {
then_order_by_desc: fn(Q) -> Q,
then_order_by_asc: fn(Q) -> Q,
le: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
ge: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
ne: fn(&PostAggregates) -> Box<dyn BoxableExpression<QS, Pg, SqlType = sql_types::Bool>>,
}
/// Returns `PaginationCursorField<_, _>` for the given name
macro_rules! field {
($name:ident) => {
// Type inference doesn't work if normal method call syntax is used
PaginationCursorField {
then_order_by_desc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.desc()),
then_order_by_asc: |query| QueryDsl::then_order_by(query, post_aggregates::$name.asc()),
le: |e| Box::new(post_aggregates::$name.le(e.$name)),
ge: |e| Box::new(post_aggregates::$name.ge(e.$name)),
ne: |e| Box::new(post_aggregates::$name.ne(e.$name)),
}
};
}
fn queries<'a>() -> Queries<
impl ReadFn<'a, PostView, (PostId, Option<PersonId>, bool)>,
impl ListFn<'a, PostView, (PostQuery<'a>, &'a Site)>,
@ -283,7 +259,10 @@ fn queries<'a>() -> Queries<
);
}
query.first::<PostView>(&mut conn).await
Commented::new(query)
.text("PostView::read")
.first::<PostView>(&mut conn)
.await
};
let list = move |mut conn: DbConn<'a>, (options, site): (PostQuery<'a>, &'a Site)| async move {
@ -461,107 +440,81 @@ fn queries<'a>() -> Queries<
query = query.filter(not(is_creator_blocked(person_id)));
}
let featured_field = if options.community_id.is_none() || options.community_id_just_for_prefetch
{
field!(featured_local)
} else {
field!(featured_community)
};
let (limit, offset) = limit_and_offset(options.page, options.limit)?;
query = query.limit(limit).offset(offset);
let (main_sort, top_sort_interval) = match options.sort.unwrap_or(SortType::Hot) {
SortType::Active => ((Ord::Desc, field!(hot_rank_active)), None),
SortType::Hot => ((Ord::Desc, field!(hot_rank)), None),
SortType::Scaled => ((Ord::Desc, field!(scaled_rank)), None),
SortType::Controversial => ((Ord::Desc, field!(controversy_rank)), None),
SortType::New => ((Ord::Desc, field!(published)), None),
SortType::Old => ((Ord::Asc, field!(published)), None),
SortType::NewComments => ((Ord::Desc, field!(newest_comment_time)), None),
SortType::MostComments => ((Ord::Desc, field!(comments)), None),
SortType::TopAll => ((Ord::Desc, field!(score)), None),
SortType::TopYear => ((Ord::Desc, field!(score)), Some(1.years())),
SortType::TopMonth => ((Ord::Desc, field!(score)), Some(1.months())),
SortType::TopWeek => ((Ord::Desc, field!(score)), Some(1.weeks())),
SortType::TopDay => ((Ord::Desc, field!(score)), Some(1.days())),
SortType::TopHour => ((Ord::Desc, field!(score)), Some(1.hours())),
SortType::TopSixHour => ((Ord::Desc, field!(score)), Some(6.hours())),
SortType::TopTwelveHour => ((Ord::Desc, field!(score)), Some(12.hours())),
SortType::TopThreeMonths => ((Ord::Desc, field!(score)), Some(3.months())),
SortType::TopSixMonths => ((Ord::Desc, field!(score)), Some(6.months())),
SortType::TopNineMonths => ((Ord::Desc, field!(score)), Some(9.months())),
};
let mut query = PaginatedQueryBuilder::new(query);
let page_after = options.page_after.map(|c| c.0);
let page_before_or_equal = options.page_before_or_equal.map(|c| c.0);
if let Some(interval) = top_sort_interval {
query = query.filter(post_aggregates::published.gt(now() - interval));
if options.page_back {
query = query
.before(page_after)
.after_or_equal(page_before_or_equal)
.limit_and_offset_from_end();
} else {
query = query
.after(page_after)
.before_or_equal(page_before_or_equal);
}
let sorts = [
Some((Ord::Desc, featured_field)),
Some(main_sort),
Some((Ord::Desc, field!(post_id))),
];
let sorts_iter = sorts.iter().flatten();
// featured posts first
query = if options.community_id.is_none() || options.community_id_just_for_prefetch {
query.then_desc(key::featured_local)
} else {
query.then_desc(key::featured_community)
};
// This loop does almost the same thing as sorting by and comparing tuples. If the rows were
// only sorted by 1 field called `foo` in descending order, then it would be like this:
//
// ```
// query = query.then_order_by(foo.desc());
// if let Some(first) = &options.page_after {
// query = query.filter(foo.le(first.foo));
// }
// if let Some(last) = &page_before_or_equal {
// query = query.filter(foo.ge(last.foo));
// }
// ```
//
// If multiple rows have the same value for a sorted field, then they are
// grouped together, and the rows in that group are sorted by the next fields.
// When checking if a row is within the range determined by the cursors, a field
// that's sorted after other fields is only compared if the row and the cursor
// are in the same group created by the previous sort, which is checked by using
// `or` to skip the comparison if any previously sorted field is not equal.
for (i, (order, field)) in sorts_iter.clone().enumerate() {
// Both cursors are treated as inclusive here. `page_after` is made exclusive
// by adding `1` to the offset.
let (then_order_by_field, compare_first, compare_last) = match order {
Ord::Desc => (field.then_order_by_desc, field.le, field.ge),
Ord::Asc => (field.then_order_by_asc, field.ge, field.le),
};
let time = |interval| post_aggregates::published.gt(now() - interval);
// then use the main sort
query = match options.sort.unwrap_or(SortType::Hot) {
SortType::Active => query.then_desc(key::hot_rank_active),
SortType::Hot => query.then_desc(key::hot_rank),
SortType::Scaled => query.then_desc(key::scaled_rank),
SortType::Controversial => query.then_desc(key::controversy_rank),
SortType::New => query.then_desc(key::published),
SortType::Old => query.then_desc(ReverseTimestampKey(key::published)),
SortType::NewComments => query.then_desc(key::newest_comment_time),
SortType::MostComments => query.then_desc(key::comments),
SortType::TopAll => query.then_desc(key::score),
SortType::TopYear => query.then_desc(key::score).filter(time(1.years())),
SortType::TopMonth => query.then_desc(key::score).filter(time(1.months())),
SortType::TopWeek => query.then_desc(key::score).filter(time(1.weeks())),
SortType::TopDay => query.then_desc(key::score).filter(time(1.days())),
SortType::TopHour => query.then_desc(key::score).filter(time(1.hours())),
SortType::TopSixHour => query.then_desc(key::score).filter(time(6.hours())),
SortType::TopTwelveHour => query.then_desc(key::score).filter(time(12.hours())),
SortType::TopThreeMonths => query.then_desc(key::score).filter(time(3.months())),
SortType::TopSixMonths => query.then_desc(key::score).filter(time(6.months())),
SortType::TopNineMonths => query.then_desc(key::score).filter(time(9.months())),
};
query = then_order_by_field(query);
for (cursor_data, compare) in [
(&options.page_after, compare_first),
(&options.page_before_or_equal, compare_last),
] {
let Some(cursor_data) = cursor_data else {
continue;
};
let mut condition: Box<dyn BoxableExpression<_, Pg, SqlType = sql_types::Bool>> =
Box::new(compare(&cursor_data.0));
// For each field that was sorted before the current one, skip the filter by changing
// `condition` to `true` if the row's value doesn't equal the cursor's value.
for (_, other_field) in sorts_iter.clone().take(i) {
condition = Box::new(condition.or((other_field.ne)(&cursor_data.0)));
}
// use publish as fallback. especially useful for hot rank which reaches zero after some days.
// necessary because old posts can be fetched over federation and inserted with high post id
query = match options.sort.unwrap_or(SortType::Hot) {
// A second time-based sort would not be very useful
SortType::New | SortType::Old | SortType::NewComments => query,
_ => query.then_desc(key::published),
};
query = query.filter(condition);
}
}
// finally use unique post id as tie breaker
query = query.then_desc(key::post_id);
let (limit, mut offset) = limit_and_offset(options.page, options.limit)?;
if options.page_after.is_some() {
// always skip exactly one post because that's the last post of the previous page
// fixing the where clause is more difficult because we'd have to change only the last order-by-where clause
// e.g. WHERE (featured_local<=, hot_rank<=, published<=) to WHERE (<=, <=, <)
offset = 1;
}
query = query.limit(limit).offset(offset);
// Not done by debug_query
let query = query.as_query();
debug!("Post View Query: {:?}", debug_query::<Pg, _>(&query));
query.load::<PostView>(&mut conn).await
Commented::new(query)
.text("PostQuery::list")
.text_if(
"getting upper bound for next query",
options.community_id_just_for_prefetch,
)
.load::<PostView>(&mut conn)
.await
};
Queries::new(read, list)
@ -628,6 +581,7 @@ pub struct PostQuery<'a> {
pub limit: Option<i64>,
pub page_after: Option<PaginationCursorData>,
pub page_before_or_equal: Option<PaginationCursorData>,
pub page_back: bool,
}
impl<'a> PostQuery<'a> {
@ -699,9 +653,15 @@ impl<'a> PostQuery<'a> {
if (v.len() as i64) < limit {
Ok(Some(self.clone()))
} else {
let page_before_or_equal = Some(PaginationCursorData(v.pop().expect("else case").counts));
let item = if self.page_back {
// for backward pagination, get first element instead
v.into_iter().next()
} else {
v.pop()
};
let limit_cursor = Some(PaginationCursorData(item.expect("else case").counts));
Ok(Some(PostQuery {
page_before_or_equal,
page_before_or_equal: limit_cursor,
..self.clone()
}))
}
@ -1403,15 +1363,19 @@ mod tests {
}
}
let options = PostQuery {
community_id: Some(inserted_community.id),
sort: Some(SortType::MostComments),
limit: Some(10),
..Default::default()
};
let mut listed_post_ids = vec![];
let mut page_after = None;
loop {
let post_listings = PostQuery {
community_id: Some(inserted_community.id),
sort: Some(SortType::MostComments),
limit: Some(10),
page_after,
..Default::default()
..options.clone()
}
.list(&data.site, pool)
.await?;
@ -1425,6 +1389,34 @@ mod tests {
}
}
// Check that backward pagination matches forward pagination
let mut listed_post_ids_forward = listed_post_ids.clone();
let mut page_before = None;
loop {
let post_listings = PostQuery {
page_after: page_before,
page_back: true,
..options.clone()
}
.list(pool)
.await?;
let listed_post_ids = post_listings.iter().map(|p| p.post.id).collect::<Vec<_>>();
let index = listed_post_ids_forward.len() - listed_post_ids.len();
assert_eq!(
listed_post_ids_forward.get(index..),
listed_post_ids.get(..)
);
listed_post_ids_forward.truncate(index);
if let Some(p) = post_listings.into_iter().next() {
page_before = Some(PaginationCursorData(p.counts));
} else {
break;
}
}
inserted_post_ids.sort_unstable_by_key(|id| id.0);
listed_post_ids.sort_unstable_by_key(|id| id.0);

@ -101,7 +101,7 @@ pub struct PostReportView {
#[derive(Serialize, Deserialize, Debug, Clone)]
#[cfg_attr(feature = "full", derive(ts_rs::TS))]
#[cfg_attr(feature = "full", ts(export))]
pub struct PaginationCursor(pub(crate) String);
pub struct PaginationCursor(pub String);
#[skip_serializing_none]
#[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]

@ -296,7 +296,7 @@ impl InstanceWorker {
}
if let Some(t) = &activity.send_community_followers_of {
if let Some(urls) = self.followed_communities.get(t) {
inbox_urls.extend(urls.iter().map(std::clone::Clone::clone));
inbox_urls.extend(urls.iter().cloned());
}
}
inbox_urls.extend(

@ -33,4 +33,4 @@ url = { workspace = true }
once_cell = { workspace = true }
tracing = { workspace = true }
tokio = { workspace = true }
rss = "2.0.6"
rss = "2.0.7"

@ -41,11 +41,11 @@ uuid = { workspace = true, features = ["serde", "v4"] }
rosetta-i18n = { workspace = true }
percent-encoding = { workspace = true }
tokio = { workspace = true }
openssl = "0.10.61"
openssl = "0.10.63"
html2text = "0.6.0"
deser-hjson = "2.2.4"
smart-default = "0.7.1"
lettre = { version = "0.11.2", features = ["tokio1", "tokio1-native-tls"] }
lettre = { version = "0.11.3", features = ["tokio1", "tokio1-native-tls"] }
markdown-it = "0.6.0"
ts-rs = { workspace = true, optional = true }
enum-map = { workspace = true }

@ -25,7 +25,7 @@ services:
lemmy:
# use "image" to pull down an already compiled lemmy. make sure to comment out "build".
# image: dessalines/lemmy:0.19.2
# image: dessalines/lemmy:0.19.3
# platform: linux/x86_64 # no arm64 support. uncomment platform if using m1.
# use "build" to build your local lemmy server image for development. make sure to comment out "image".
# run: docker compose up --build
@ -55,7 +55,7 @@ services:
lemmy-ui:
# use "image" to pull down an already compiled lemmy-ui. make sure to comment out "build".
image: dessalines/lemmy-ui:0.19.2
image: dessalines/lemmy-ui:0.19.3
# platform: linux/x86_64 # no arm64 support. uncomment platform if using m1.
# use "build" to build your local lemmy ui image for development. make sure to comment out "image".
# run: docker compose up --build

@ -2,7 +2,7 @@ version: "3.7"
x-ui-default: &ui-default
init: true
image: dessalines/lemmy-ui:0.19.2
image: dessalines/lemmy-ui:0.19.3
# assuming lemmy-ui is cloned besides lemmy directory
# build:
# context: ../../../lemmy-ui

@ -0,0 +1,88 @@
CREATE OR REPLACE FUNCTION post_aggregates_post ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
INSERT INTO post_aggregates (post_id, published, newest_comment_time, newest_comment_time_necro, community_id, creator_id, instance_id)
SELECT
NEW.id,
NEW.published,
NEW.published,
NEW.published,
NEW.community_id,
NEW.creator_id,
community.instance_id
FROM
community
WHERE
NEW.community_id = community.id;
ELSIF (TG_OP = 'DELETE') THEN
DELETE FROM post_aggregates
WHERE post_id = OLD.id;
END IF;
RETURN NULL;
END
$$;
CREATE OR REPLACE TRIGGER post_aggregates_post
AFTER INSERT OR DELETE ON post
FOR EACH ROW
EXECUTE PROCEDURE post_aggregates_post ();
CREATE OR REPLACE TRIGGER community_aggregates_post_count
AFTER INSERT OR DELETE OR UPDATE OF removed,
deleted ON post
FOR EACH ROW
EXECUTE PROCEDURE community_aggregates_post_count ();
DROP FUNCTION IF EXISTS community_aggregates_post_count_insert CASCADE;
DROP FUNCTION IF EXISTS community_aggregates_post_update CASCADE;
DROP FUNCTION IF EXISTS site_aggregates_post_update CASCADE;
DROP FUNCTION IF EXISTS person_aggregates_post_insert CASCADE;
CREATE OR REPLACE FUNCTION site_aggregates_post_insert ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF (was_restored_or_created (TG_OP, OLD, NEW)) THEN
UPDATE
site_aggregates sa
SET
posts = posts + 1
FROM
site s
WHERE
sa.site_id = s.id;
END IF;
RETURN NULL;
END
$$;
CREATE OR REPLACE TRIGGER site_aggregates_post_insert
AFTER INSERT OR UPDATE OF removed,
deleted ON post
FOR EACH ROW
WHEN (NEW.local = TRUE)
EXECUTE PROCEDURE site_aggregates_post_insert ();
CREATE OR REPLACE FUNCTION generate_unique_changeme ()
RETURNS text
LANGUAGE sql
AS $$
SELECT
'http://changeme.invalid/' || substr(md5(random()::text), 0, 25);
$$;
CREATE OR REPLACE TRIGGER person_aggregates_post_count
AFTER INSERT OR DELETE OR UPDATE OF removed,
deleted ON post
FOR EACH ROW
EXECUTE PROCEDURE person_aggregates_post_count ();
DROP SEQUENCE IF EXISTS changeme_seq;

@ -0,0 +1,166 @@
-- Change triggers to run once per statement instead of once per row
-- post_aggregates_post trigger doesn't need to handle deletion because the post_id column has ON DELETE CASCADE
CREATE OR REPLACE FUNCTION post_aggregates_post ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
INSERT INTO post_aggregates (post_id, published, newest_comment_time, newest_comment_time_necro, community_id, creator_id, instance_id)
SELECT
id,
published,
published,
published,
community_id,
creator_id,
(
SELECT
community.instance_id
FROM
community
WHERE
community.id = community_id
LIMIT 1)
FROM
new_post;
RETURN NULL;
END
$$;
CREATE OR REPLACE FUNCTION community_aggregates_post_count_insert ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
UPDATE
community_aggregates
SET
posts = posts + post_group.count
FROM (
SELECT
community_id,
count(*)
FROM
new_post
GROUP BY
community_id) post_group
WHERE
community_aggregates.community_id = post_group.community_id;
RETURN NULL;
END
$$;
CREATE OR REPLACE FUNCTION person_aggregates_post_insert ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
UPDATE
person_aggregates
SET
post_count = post_count + post_group.count
FROM (
SELECT
creator_id,
count(*)
FROM
new_post
GROUP BY
creator_id) post_group
WHERE
person_aggregates.person_id = post_group.creator_id;
RETURN NULL;
END
$$;
CREATE OR REPLACE TRIGGER post_aggregates_post
AFTER INSERT ON post REFERENCING NEW TABLE AS new_post
FOR EACH STATEMENT
EXECUTE PROCEDURE post_aggregates_post ();
-- Don't run old trigger for insert
CREATE OR REPLACE TRIGGER community_aggregates_post_count
AFTER DELETE OR UPDATE OF removed,
deleted ON post
FOR EACH ROW
EXECUTE PROCEDURE community_aggregates_post_count ();
CREATE OR REPLACE TRIGGER community_aggregates_post_count_insert
AFTER INSERT ON post REFERENCING NEW TABLE AS new_post
FOR EACH STATEMENT
EXECUTE PROCEDURE community_aggregates_post_count_insert ();
CREATE OR REPLACE FUNCTION site_aggregates_post_update ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF (was_restored_or_created (TG_OP, OLD, NEW)) THEN
UPDATE
site_aggregates sa
SET
posts = posts + 1
FROM
site s
WHERE
sa.site_id = s.id;
END IF;
RETURN NULL;
END
$$;
CREATE OR REPLACE FUNCTION site_aggregates_post_insert ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
UPDATE
site_aggregates sa
SET
posts = posts + (
SELECT
count(*)
FROM
new_post)
FROM
site s
WHERE
sa.site_id = s.id;
RETURN NULL;
END
$$;
CREATE OR REPLACE TRIGGER site_aggregates_post_update
AFTER UPDATE OF removed,
deleted ON post
FOR EACH ROW
WHEN (NEW.local = TRUE)
EXECUTE PROCEDURE site_aggregates_post_update ();
CREATE OR REPLACE TRIGGER site_aggregates_post_insert
AFTER INSERT ON post REFERENCING NEW TABLE AS new_post
FOR EACH STATEMENT
EXECUTE PROCEDURE site_aggregates_post_insert ();
CREATE OR REPLACE TRIGGER person_aggregates_post_count
AFTER DELETE OR UPDATE OF removed,
deleted ON post
FOR EACH ROW
EXECUTE PROCEDURE person_aggregates_post_count ();
CREATE OR REPLACE TRIGGER person_aggregates_post_insert
AFTER INSERT ON post REFERENCING NEW TABLE AS new_post
FOR EACH STATEMENT
EXECUTE PROCEDURE person_aggregates_post_insert ();
-- Avoid running hash function and random number generation for default ap_id
CREATE SEQUENCE IF NOT EXISTS changeme_seq AS bigint CYCLE;
CREATE OR REPLACE FUNCTION generate_unique_changeme ()
RETURNS text
LANGUAGE sql
AS $$
SELECT
'http://changeme.invalid/seq/' || nextval('changeme_seq')::text;
$$;

@ -0,0 +1,4 @@
DROP INDEX idx_post_aggregates_community_published_asc, idx_post_aggregates_featured_community_published_asc, idx_post_aggregates_featured_local_published_asc, idx_post_aggregates_published_asc;
DROP FUNCTION reverse_timestamp_sort (t timestamp with time zone);

@ -0,0 +1,18 @@
CREATE FUNCTION reverse_timestamp_sort (t timestamp with time zone)
RETURNS bigint
AS $$
BEGIN
RETURN (-1000000 * EXTRACT(EPOCH FROM t))::bigint;
END;
$$
LANGUAGE plpgsql
IMMUTABLE PARALLEL SAFE;
CREATE INDEX idx_post_aggregates_community_published_asc ON public.post_aggregates USING btree (community_id, featured_local DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_featured_community_published_asc ON public.post_aggregates USING btree (community_id, featured_community DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_featured_local_published_asc ON public.post_aggregates USING btree (featured_local DESC, reverse_timestamp_sort (published) DESC);
CREATE INDEX idx_post_aggregates_published_asc ON public.post_aggregates USING btree (reverse_timestamp_sort (published) DESC);

@ -0,0 +1,42 @@
ALTER TABLE community_aggregates
DROP COLUMN subscribers_local;
-- old function from migrations/2023-10-02-145002_community_followers_count_federated/up.sql
-- The subscriber count should only be updated for local communities. For remote
-- communities it is read over federation from the origin instance.
CREATE OR REPLACE FUNCTION community_aggregates_subscriber_count ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
UPDATE
community_aggregates
SET
subscribers = subscribers + 1
FROM
community
WHERE
community.id = community_id
AND community.local
AND community_id = NEW.community_id;
ELSIF (TG_OP = 'DELETE') THEN
UPDATE
community_aggregates
SET
subscribers = subscribers - 1
FROM
community
WHERE
community.id = community_id
AND community.local
AND community_id = OLD.community_id;
END IF;
RETURN NULL;
END
$$;
DROP TRIGGER IF EXISTS delete_follow_before_person ON person;
DROP FUNCTION IF EXISTS delete_follow_before_person;

@ -0,0 +1,81 @@
-- Couldn't find a way to put subscribers_local right after subscribers except recreating the table.
ALTER TABLE community_aggregates
ADD COLUMN subscribers_local bigint NOT NULL DEFAULT 0;
-- update initial value
-- update by counting local persons who follow communities.
WITH follower_counts AS (
SELECT
community_id,
count(*) AS local_sub_count
FROM
community_follower cf
JOIN person p ON p.id = cf.person_id
WHERE
p.local = TRUE
GROUP BY
community_id)
UPDATE
community_aggregates ca
SET
subscribers_local = local_sub_count
FROM
follower_counts
WHERE
ca.community_id = follower_counts.community_id;
-- subscribers should be updated only when a local community is followed by a local or remote person
-- subscribers_local should be updated only when a local person follows a local or remote community
CREATE OR REPLACE FUNCTION community_aggregates_subscriber_count ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
UPDATE
community_aggregates ca
SET
subscribers = subscribers + community.local::int,
subscribers_local = subscribers_local + person.local::int
FROM
community
LEFT JOIN person ON person.id = NEW.person_id
WHERE
community.id = NEW.community_id
AND community.id = ca.community_id
AND person.local IS NOT NULL;
ELSIF (TG_OP = 'DELETE') THEN
UPDATE
community_aggregates ca
SET
subscribers = subscribers - community.local::int,
subscribers_local = subscribers_local - person.local::int
FROM
community
LEFT JOIN person ON person.id = OLD.person_id
WHERE
community.id = OLD.community_id
AND community.id = ca.community_id
AND person.local IS NOT NULL;
END IF;
RETURN NULL;
END
$$;
-- to be able to join person on the trigger above, we need to run it before the person is deleted: https://github.com/LemmyNet/lemmy/pull/4166#issuecomment-1874095856
CREATE FUNCTION delete_follow_before_person ()
RETURNS TRIGGER
LANGUAGE plpgsql
AS $$
BEGIN
DELETE FROM community_follower AS c
WHERE c.person_id = OLD.id;
RETURN OLD;
END;
$$;
CREATE TRIGGER delete_follow_before_person
BEFORE DELETE ON person
FOR EACH ROW
EXECUTE FUNCTION delete_follow_before_person ();

@ -0,0 +1,20 @@
#!/usr/bin/env bash
# This script runs crates/lemmy_db_perf/src/main.rs, which lets you see info related to database query performance, such as query plans.
set -e
CWD="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
cd $CWD/../
source scripts/start_dev_db.sh
export LEMMY_CONFIG_LOCATION=config/config.hjson
export RUST_BACKTRACE=1
cargo run --package lemmy_db_perf -- "$@"
pg_ctl stop --silent
# $PGDATA directory is kept so log can be seen

@ -2,23 +2,47 @@
export PGDATA="$PWD/dev_pgdata"
export PGHOST=$PWD
export LEMMY_DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD"
export DATABASE_URL="postgresql://lemmy:password@/lemmy?host=$PWD"
export LEMMY_DATABASE_URL=$DATABASE_URL
# If cluster exists, stop the server and delete the cluster
if [ -d $PGDATA ]
if [[ -d $PGDATA ]]
then
# Prevent `stop` from failing if server already stopped
pg_ctl restart > /dev/null
pg_ctl stop
# Only stop server if it is running
pg_status_exit_code=0
(pg_ctl status > /dev/null) || pg_status_exit_code=$?
if [[ ${pg_status_exit_code} -ne 3 ]]
then
pg_ctl stop --silent
fi
rm -rf $PGDATA
fi
config_args=(
# Only listen to socket in current directory
-c listen_addresses=
-c unix_socket_directories=$PWD
# Write logs to a file in $PGDATA/log
-c logging_collector=on
# Allow auto_explain to be turned on
-c session_preload_libraries=auto_explain
# Include actual row amounts and run times for query plan nodes
-c auto_explain.log_analyze=on
# Don't log parameter values
-c auto_explain.log_parameter_max_length=0
)
# Create cluster
initdb --username=postgres --auth=trust --no-instructions
pg_ctl init --silent --options="--username=postgres --auth=trust --no-instructions"
# Start server that only listens to socket in current directory
pg_ctl start --options="-c listen_addresses= -c unix_socket_directories=$PWD" > /dev/null
# Start server
pg_ctl start --silent --options="${config_args[*]}"
# Setup database
psql -c "CREATE USER lemmy WITH PASSWORD 'password' SUPERUSER;" -U postgres
psql -c "CREATE DATABASE lemmy WITH OWNER lemmy;" -U postgres
psql --quiet -c "CREATE USER lemmy WITH PASSWORD 'password' SUPERUSER;" -U postgres
psql --quiet -c "CREATE DATABASE lemmy WITH OWNER lemmy;" -U postgres

@ -27,5 +27,5 @@ cargo test -p lemmy_utils --all-features --no-fail-fast
# Add this to do printlns: -- --nocapture
pg_ctl stop
pg_ctl stop --silent
rm -rf $PGDATA

Loading…
Cancel
Save