Halfway done with hot rank indexes.

pull/1344/head
Dessalines 3 years ago
parent 61c6f33103
commit d227000de3

3
.gitignore vendored

@ -15,8 +15,7 @@ volumes
# local build files # local build files
target target
env_setup.sh env_setup.sh
query_testing/*.json query_testing/**/reports/*.json
query_testing/*.json.old
# API tests # API tests
api_tests/node_modules api_tests/node_modules

@ -10,6 +10,7 @@ pub struct CommentAggregates {
pub score: i64, pub score: i64,
pub upvotes: i64, pub upvotes: i64,
pub downvotes: i64, pub downvotes: i64,
pub published: chrono::NaiveDateTime,
} }
impl CommentAggregates { impl CommentAggregates {

@ -10,6 +10,7 @@ pub struct CommunityAggregates {
pub subscribers: i64, pub subscribers: i64,
pub posts: i64, pub posts: i64,
pub comments: i64, pub comments: i64,
pub published: chrono::NaiveDateTime,
} }
impl CommunityAggregates { impl CommunityAggregates {

@ -11,6 +11,7 @@ pub struct PostAggregates {
pub score: i64, pub score: i64,
pub upvotes: i64, pub upvotes: i64,
pub downvotes: i64, pub downvotes: i64,
pub published: chrono::NaiveDateTime,
pub newest_comment_time: chrono::NaiveDateTime, pub newest_comment_time: chrono::NaiveDateTime,
} }

@ -41,6 +41,7 @@ table! {
score -> Int8, score -> Int8,
upvotes -> Int8, upvotes -> Int8,
downvotes -> Int8, downvotes -> Int8,
published -> Timestamp,
} }
} }
@ -108,6 +109,7 @@ table! {
subscribers -> Int8, subscribers -> Int8,
posts -> Int8, posts -> Int8,
comments -> Int8, comments -> Int8,
published -> Timestamp,
} }
} }
@ -280,6 +282,7 @@ table! {
score -> Int8, score -> Int8,
upvotes -> Int8, upvotes -> Int8,
downvotes -> Int8, downvotes -> Int8,
published -> Timestamp,
newest_comment_time -> Timestamp, newest_comment_time -> Timestamp,
} }
} }

@ -365,8 +365,8 @@ impl<'a> CommentQueryBuilder<'a> {
query = match self.sort { query = match self.sort {
SortType::Hot | SortType::Active => query SortType::Hot | SortType::Active => query
.order_by(hot_rank(comment_aggregates::score, comment::published).desc()) .order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
.then_order_by(comment::published.desc()), .then_order_by(comment_aggregates::published.desc()),
SortType::New => query.order_by(comment::published.desc()), SortType::New => query.order_by(comment::published.desc()),
SortType::TopAll => query.order_by(comment_aggregates::score.desc()), SortType::TopAll => query.order_by(comment_aggregates::score.desc()),
SortType::TopYear => query SortType::TopYear => query
@ -614,6 +614,7 @@ mod tests {
score: 1, score: 1,
upvotes: 1, upvotes: 1,
downvotes: 0, downvotes: 0,
published: agg.published,
}, },
}; };

@ -349,10 +349,10 @@ impl<'a> PostQueryBuilder<'a> {
.then_order_by( .then_order_by(
hot_rank(post_aggregates::score, post_aggregates::newest_comment_time).desc(), hot_rank(post_aggregates::score, post_aggregates::newest_comment_time).desc(),
) )
.then_order_by(post::published.desc()), .then_order_by(post_aggregates::newest_comment_time.desc()),
SortType::Hot => query SortType::Hot => query
.then_order_by(hot_rank(post_aggregates::score, post::published).desc()) .then_order_by(hot_rank(post_aggregates::score, post_aggregates::published).desc())
.then_order_by(post::published.desc()), .then_order_by(post_aggregates::published.desc()),
SortType::New => query.then_order_by(post::published.desc()), SortType::New => query.then_order_by(post::published.desc()),
SortType::TopAll => query.then_order_by(post_aggregates::score.desc()), SortType::TopAll => query.then_order_by(post_aggregates::score.desc()),
SortType::TopYear => query SortType::TopYear => query
@ -601,6 +601,7 @@ mod tests {
score: 1, score: 1,
upvotes: 1, upvotes: 1,
downvotes: 0, downvotes: 0,
published: agg.published,
newest_comment_time: inserted_post.published, newest_comment_time: inserted_post.published,
}, },
subscribed: false, subscribed: false,

@ -186,9 +186,14 @@ impl<'a> CommunityQueryBuilder<'a> {
// Covers all other sorts, including hot // Covers all other sorts, including hot
_ => { _ => {
query = query query = query
// TODO do custom sql function for hot_rank, make sure this works .order_by(
.order_by(hot_rank(community_aggregates::subscribers, community::published).desc()) hot_rank(
.then_order_by(community_aggregates::subscribers.desc()) community_aggregates::subscribers,
community_aggregates::published,
)
.desc(),
)
.then_order_by(community_aggregates::published.desc())
} }
}; };

@ -268,8 +268,8 @@ impl<'a> UserMentionQueryBuilder<'a> {
query = match self.sort { query = match self.sort {
SortType::Hot | SortType::Active => query SortType::Hot | SortType::Active => query
.order_by(hot_rank(comment_aggregates::score, comment::published).desc()) .order_by(hot_rank(comment_aggregates::score, comment_aggregates::published).desc())
.then_order_by(comment::published.desc()), .then_order_by(comment_aggregates::published.desc()),
SortType::New => query.order_by(comment::published.desc()), SortType::New => query.order_by(comment::published.desc()),
SortType::TopAll => query.order_by(comment_aggregates::score.desc()), SortType::TopAll => query.order_by(comment_aggregates::score.desc()),
SortType::TopYear => query SortType::TopYear => query

@ -5,15 +5,17 @@ create table community_aggregates (
subscribers bigint not null default 0, subscribers bigint not null default 0,
posts bigint not null default 0, posts bigint not null default 0,
comments bigint not null default 0, comments bigint not null default 0,
published timestamp not null default now(),
unique (community_id) unique (community_id)
); );
insert into community_aggregates (community_id, subscribers, posts, comments) insert into community_aggregates (community_id, subscribers, posts, comments, published)
select select
c.id, c.id,
coalesce(cf.subs, 0) as subscribers, coalesce(cf.subs, 0) as subscribers,
coalesce(cd.posts, 0) as posts, coalesce(cd.posts, 0) as posts,
coalesce(cd.comments, 0) as comments coalesce(cd.comments, 0) as comments,
c.published
from community c from community c
left join ( left join (
select select

@ -6,17 +6,19 @@ create table post_aggregates (
score bigint not null default 0, score bigint not null default 0,
upvotes bigint not null default 0, upvotes bigint not null default 0,
downvotes bigint not null default 0, downvotes bigint not null default 0,
published timestamp not null default now(),
newest_comment_time timestamp not null default now(), newest_comment_time timestamp not null default now(),
unique (post_id) unique (post_id)
); );
insert into post_aggregates (post_id, comments, score, upvotes, downvotes, newest_comment_time) insert into post_aggregates (post_id, comments, score, upvotes, downvotes, published, newest_comment_time)
select select
p.id, p.id,
coalesce(ct.comments, 0::bigint) as comments, coalesce(ct.comments, 0::bigint) as comments,
coalesce(pl.score, 0::bigint) as score, coalesce(pl.score, 0::bigint) as score,
coalesce(pl.upvotes, 0::bigint) as upvotes, coalesce(pl.upvotes, 0::bigint) as upvotes,
coalesce(pl.downvotes, 0::bigint) as downvotes, coalesce(pl.downvotes, 0::bigint) as downvotes,
p.published,
greatest(ct.recent_comment_time, p.published) as newest_activity_time greatest(ct.recent_comment_time, p.published) as newest_activity_time
from post p from post p
left join ( left join (
@ -64,7 +66,9 @@ begin
update post_aggregates pa update post_aggregates pa
set comments = comments + 1, set comments = comments + 1,
newest_comment_time = NEW.published newest_comment_time = NEW.published
where pa.post_id = NEW.post_id; where pa.post_id = NEW.post_id
-- A 2 day necro-bump limit
and published > ('now'::timestamp - '2 days'::interval);
ELSIF (TG_OP = 'DELETE') THEN ELSIF (TG_OP = 'DELETE') THEN
-- Join to post because that post may not exist anymore -- Join to post because that post may not exist anymore
update post_aggregates pa update post_aggregates pa

@ -5,15 +5,17 @@ create table comment_aggregates (
score bigint not null default 0, score bigint not null default 0,
upvotes bigint not null default 0, upvotes bigint not null default 0,
downvotes bigint not null default 0, downvotes bigint not null default 0,
published timestamp not null default now(),
unique (comment_id) unique (comment_id)
); );
insert into comment_aggregates (comment_id, score, upvotes, downvotes) insert into comment_aggregates (comment_id, score, upvotes, downvotes, published)
select select
c.id, c.id,
COALESCE(cl.total, 0::bigint) AS score, COALESCE(cl.total, 0::bigint) AS score,
COALESCE(cl.up, 0::bigint) AS upvotes, COALESCE(cl.up, 0::bigint) AS upvotes,
COALESCE(cl.down, 0::bigint) AS downvotes COALESCE(cl.down, 0::bigint) AS downvotes,
c.published
from comment c from comment c
left join ( select l.comment_id as id, left join ( select l.comment_id as id,
sum(l.score) as total, sum(l.score) as total,

@ -0,0 +1,25 @@
-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity
create or replace function hot_rank(
score numeric,
published timestamp without time zone)
returns integer as $$
begin
-- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600
return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer;
end; $$
LANGUAGE plpgsql;
drop index
idx_post_published,
idx_post_stickied,
idx_post_aggregates_hot,
idx_post_aggregates_active,
idx_post_aggregates_score,
idx_comment_published,
idx_comment_aggregates_hot,
idx_comment_aggregates_score,
idx_user_published,
idx_user_aggregates_comment_score,
idx_community_published,
idx_community_aggregates_hot,
idx_community_aggregates_subscribers;

@ -0,0 +1,45 @@
-- Need to add immutable to the hot_rank function in order to index by it
-- Rank = ScaleFactor * sign(Score) * log(1 + abs(Score)) / (Time + 2)^Gravity
create or replace function hot_rank(
score numeric,
published timestamp without time zone)
returns integer as $$
begin
-- hours_diff:=EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600
return floor(10000*log(greatest(1,score+3)) / power(((EXTRACT(EPOCH FROM (timezone('utc',now()) - published))/3600) + 2), 1.8))::integer;
end; $$
LANGUAGE plpgsql
IMMUTABLE;
-- Post
create index idx_post_published on post (published desc);
create index idx_post_stickied on post (stickied desc);
-- Post_aggregates
create index idx_post_aggregates_hot on post_aggregates (hot_rank(score, published) desc, published desc);
create index idx_post_aggregates_active on post_aggregates (hot_rank(score, newest_comment_time) desc, newest_comment_time desc);
create index idx_post_aggregates_score on post_aggregates (score desc);
-- Comment
create index idx_comment_published on comment (published desc);
-- Comment_aggregates
create index idx_comment_aggregates_hot on comment_aggregates (hot_rank(score, published) desc, published desc);
create index idx_comment_aggregates_score on comment_aggregates (score desc);
-- User
create index idx_user_published on user_ (published desc);
-- User_aggregates
create index idx_user_aggregates_comment_score on user_aggregates (comment_score desc);
-- Community
create index idx_community_published on community (published desc);
-- Community_aggregates
create index idx_community_aggregates_hot on community_aggregates (hot_rank(subscribers, published) desc, published desc);
create index idx_community_aggregates_subscribers on community_aggregates (subscribers desc);

@ -1,13 +0,0 @@
comment_fast_view.json: "Execution Time": 400.841
comment_view.json: "Execution Time": 2312.899
community_fast_view.json: "Execution Time": 0.272
community_view.json: "Execution Time": 36.572
post_fast_view.json: "Execution Time": 128.839
post_view.json: "Execution Time": 970.671
private_message_view.json: "Execution Time": 1.426
reply_fast_view.json: "Execution Time": 426.179
site_view.json: "Execution Time": 2.453
user_fast.json: "Execution Time": 0.400
user_mention_fast_view.json: "Execution Time": 0.179
user_mention_view.json: "Execution Time": 95.815
user_view.json: "Execution Time": 44.692

@ -1,13 +0,0 @@
comment_fast_view.json: "Execution Time": 3.501
comment_view.json: "Execution Time": 2312.899
community_fast_view.json: "Execution Time": 0.372
community_view.json: "Execution Time": 36.572
post_fast_view.json: "Execution Time": 78.920
post_view.json: "Execution Time": 970.671
private_message_view.json: "Execution Time": 1.426
reply_fast_view.json: "Execution Time": 32.875
site_view.json: "Execution Time": 2.593
user_fast.json: "Execution Time": 0.155
user_mention_fast_view.json: "Execution Time": 0.171
user_mention_view.json: "Execution Time": 1468.291
user_view.json: "Execution Time": 44.692

@ -1,43 +0,0 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
# Do the views first
echo "explain (analyze, format json) select * from user_fast" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_fast.json
echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > post_view.json
echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > post_fast_view.json
echo "explain (analyze, format json) select * from comment_view where user_id is null" > explain.sql
psql -qAt -U lemmy -f explain.sql > comment_view.json
echo "explain (analyze, format json) select * from comment_fast_view where user_id is null" > explain.sql
psql -qAt -U lemmy -f explain.sql > comment_fast_view.json
echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > community_view.json
echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc" > explain.sql
psql -qAt -U lemmy -f explain.sql > community_fast_view.json
echo "explain (analyze, format json) select * from site_view limit 1" > explain.sql
psql -qAt -U lemmy -f explain.sql > site_view.json
echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > reply_fast_view.json
echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_mention_view.json
echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34" > explain.sql
psql -qAt -U lemmy -f explain.sql > user_mention_fast_view.json
grep "Execution Time" *.json
rm explain.sql

@ -0,0 +1,49 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
pushd reports
# Do the views first
PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy"
echo "explain (analyze, format json) select * from user_fast limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_fast.json
echo "explain (analyze, format json) select * from post_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_view.json
echo "explain (analyze, format json) select * from post_fast_view where user_id is null order by hot_rank desc, published desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_fast_view.json
echo "explain (analyze, format json) select * from comment_view where user_id is null limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment_view.json
echo "explain (analyze, format json) select * from comment_fast_view where user_id is null limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment_fast_view.json
echo "explain (analyze, format json) select * from community_view where user_id is null order by hot_rank desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_view.json
echo "explain (analyze, format json) select * from community_fast_view where user_id is null order by hot_rank desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_fast_view.json
echo "explain (analyze, format json) select * from site_view limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > site_view.json
echo "explain (analyze, format json) select * from reply_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > reply_fast_view.json
echo "explain (analyze, format json) select * from user_mention_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention_view.json
echo "explain (analyze, format json) select * from user_mention_fast_view where user_id = 34 and recipient_id = 34 limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention_fast_view.json
grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out
rm explain.sql
popd

@ -0,0 +1,11 @@
comment_fast_view.json: "Execution Time": 93.165
comment_view.json: "Execution Time": 4513.485
community_fast_view.json: "Execution Time": 3.998
community_view.json: "Execution Time": 561.814
post_fast_view.json: "Execution Time": 1604.543
post_view.json: "Execution Time": 11630.471
reply_fast_view.json: "Execution Time": 85.708
site_view.json: "Execution Time": 27.264
user_fast.json: "Execution Time": 0.135
user_mention_fast_view.json: "Execution Time": 6.665
user_mention_view.json: "Execution Time": 4996.688

@ -0,0 +1,41 @@
#!/bin/bash
set -e
# You can import these to http://tatiyants.com/pev/#/plans/new
pushd reports
PSQL_CMD="docker exec -i dev_postgres_1 psql -qAt -U lemmy"
echo "explain (analyze, format json) select * from user_ limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_.json
echo "explain (analyze, format json) select * from post p limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post.json
echo "explain (analyze, format json) select * from post p, post_aggregates pa where p.id = pa.post_id order by hot_rank(pa.score, pa.published) desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > post_ordered_by_rank.json
echo "explain (analyze, format json) select * from comment limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > comment.json
echo "explain (analyze, format json) select * from community limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community.json
echo "explain (analyze, format json) select * from community c, community_aggregates ca where c.id = ca.community_id order by hot_rank(ca.subscribers, ca.published) desc limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > community_ordered_by_subscribers.json
echo "explain (analyze, format json) select * from site s" > explain.sql
cat explain.sql | $PSQL_CMD > site.json
echo "explain (analyze, format json) select * from user_mention limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > user_mention.json
echo "explain (analyze, format json) select * from private_message limit 100" > explain.sql
cat explain.sql | $PSQL_CMD > private_message.json
grep "Execution Time" *.json > ../timings-`date +%Y-%m-%d_%H-%M-%S`.out
rm explain.sql
popd

@ -0,0 +1,9 @@
comment.json: "Execution Time": 12.263
community.json: "Execution Time": 1.225
community_ordered_by_subscribers.json: "Execution Time": 170.255
post.json: "Execution Time": 5.373
post_ordered_by_rank.json: "Execution Time": 1458.801
private_message.json: "Execution Time": 0.306
site.json: "Execution Time": 0.064
user_.json: "Execution Time": 2.606
user_mention.json: "Execution Time": 0.135

@ -0,0 +1,9 @@
comment.json: "Execution Time": 0.136
community.json: "Execution Time": 0.157
community_ordered_by_subscribers.json: "Execution Time": 16.036
post.json: "Execution Time": 0.129
post_ordered_by_rank.json: "Execution Time": 15.969
private_message.json: "Execution Time": 0.133
site.json: "Execution Time": 0.056
user_.json: "Execution Time": 0.300
user_mention.json: "Execution Time": 0.122
Loading…
Cancel
Save