From 5ca8a0bda9157ee34c12cb4360f46493dba69979 Mon Sep 17 00:00:00 2001 From: dP Date: Sun, 26 Jan 2020 00:53:16 +0300 Subject: [PATCH 1/4] Feature #7962: Significantly improve sprite sorter performance --- src/viewport.cpp | 152 ++++++++++++++++++-------- src/viewport_sprite_sorter.h | 2 +- src/viewport_sprite_sorter_sse4.cpp | 162 ++++++++++++++++++++-------- 3 files changed, 227 insertions(+), 89 deletions(-) diff --git a/src/viewport.cpp b/src/viewport.cpp index bf1f34acd6..88a7d40464 100644 --- a/src/viewport.cpp +++ b/src/viewport.cpp @@ -89,7 +89,9 @@ #include "network/network_func.h" #include "framerate_type.h" +#include #include +#include #include "table/strings.h" #include "table/string_colours.h" @@ -726,7 +728,6 @@ void AddSortableSpriteToDraw(SpriteID image, PaletteID pal, int x, int y, int w, ps.zmin = z + bb_offset_z; ps.zmax = z + max(bb_offset_z, dz) - 1; - ps.comparison_done = false; ps.first_child = -1; _vd.last_child = &ps.first_child; @@ -1498,64 +1499,127 @@ static bool ViewportSortParentSpritesChecker() return true; } -/** Sort parent sprites pointer array */ +/** Sort parent sprites pointer array replicating the way original sorter did it. */ static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv) { - auto psdvend = psdv->end(); - auto psd = psdv->begin(); - while (psd != psdvend) { - ParentSpriteToDraw *ps = *psd; + if (psdv->size() < 2) return; - if (ps->comparison_done) { - psd++; + /* We rely on sprites being, for the most part, already ordered. + * So we don't need to move many of them and can keep track of their + * order efficiently by using stack. We always move sprites to the front + * of the current position, i.e. to the top of the stack. + * Also use special constants to indicate sorting state without + * adding extra fields to ParentSpriteToDraw structure. + */ + const uint32 ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it + const uint32 ORDER_RETURNED = UINT32_MAX - 1; // Makr sorted sprite in case there are other occurrences of it in the stack + std::stack sprite_order; + uint32 next_order = 0; + + std::forward_list> sprite_list; // We store sprites in a list sorted by xmin+ymin + + /* Initialize sprite list and order. */ + for (auto p = psdv->rbegin(); p != psdv->rend(); p++) { + sprite_list.push_front(std::make_pair((*p)->xmin + (*p)->ymin, *p)); + sprite_order.push(*p); + (*p)->order = next_order++; + } + + sprite_list.sort(); + + std::vector preceding; // Temporarily stores sprites that precede current and their position in the list + auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite + auto out = psdv->begin(); // Iterator to output sorted sprites + + while (!sprite_order.empty()) { + + auto s = sprite_order.top(); + sprite_order.pop(); + + /* Sprite is already sorted, ignore it. */ + if (s->order == ORDER_RETURNED) continue; + + /* Sprite was already compared, just need to output it. */ + if (s->order == ORDER_COMPARED) { + *(out++) = s; + s->order = ORDER_RETURNED; continue; } - ps->comparison_done = true; - - for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) { - ParentSpriteToDraw *ps2 = *psd2; - - if (ps2->comparison_done) continue; - - /* Decide which comparator to use, based on whether the bounding - * boxes overlap - */ - if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? - ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? - ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? - /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of - * the screen and with higher Z elevation, are drawn in front. - * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, - * i.e. X=(left+right)/2, etc. - * However, since we only care about order, don't actually divide / 2 - */ - if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= - ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { - continue; - } - } else { - /* We only change the order, if it is definite. - * I.e. every single order of X, Y, Z says ps2 is behind ps or they overlap. - * That is: If one partial order says ps behind ps2, do not change the order. - */ - if (ps->xmax < ps2->xmin || - ps->ymax < ps2->ymin || - ps->zmax < ps2->zmin) { + preceding.clear(); + + /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax + * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax + * we get all we need and some more that we filter out later. + * We don't include zmin into the sum as there are usually more neighbors on x and y than z + * so including it will actually increase the amount of false positives. + * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax) + * to ensure that we iterate the current sprite as we need to remove it from the list. + */ + auto ssum = max(s->xmax, s->xmin) + max(s->ymax, s->ymin); + auto prev = sprite_list.before_begin(); + auto x = sprite_list.begin(); + while (x != sprite_list.end() && ((*x).first <= ssum)) { + auto p = (*x).second; + if (p == s) { + /* We found the current sprite, remove it and move on. */ + x = sprite_list.erase_after(prev); + continue; + } + + auto p_prev = prev; + prev = x++; + + if (s->xmax < p->xmin || s->ymax < p->ymin || s->zmax < p->zmin) continue; + if (s->xmin <= p->xmax && // overlap in X? + s->ymin <= p->ymax && // overlap in Y? + s->zmin <= p->zmax) { // overlap in Z? + if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <= + p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) { continue; } } + preceding.push_back(p); + preceding_prev = p_prev; + } + + if (preceding.empty()) { + /* No preceding sprites, add current one to the output */ + *(out++) = s; + s->order = ORDER_RETURNED; + continue; + } - /* Move ps2 in front of ps */ - ParentSpriteToDraw *temp = ps2; - for (auto psd3 = psd2; psd3 > psd; psd3--) { - *psd3 = *(psd3 - 1); + /* Optimization for the case when we only have 1 sprite to move. */ + if (preceding.size() == 1) { + auto p = preceding[0]; + /* We can only output the preceding sprite if there can't be any other sprites preceding it. */ + if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) { + p->order = ORDER_RETURNED; + s->order = ORDER_RETURNED; + sprite_list.erase_after(preceding_prev); + *(out++) = p; + *(out++) = s; + continue; } - *psd = temp; + } + + /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */ + std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) { + return a->order > b->order; + }); + + s->order = ORDER_COMPARED; + sprite_order.push(s); // Still need to output so push it back for now + + for (auto p: preceding) { + p->order = next_order++; + sprite_order.push(p); } } } + static void ViewportDrawParentSprites(const ParentSpriteToSortVector *psd, const ChildScreenSpriteToDrawVector *csstdv) { for (const ParentSpriteToDraw *ps : *psd) { diff --git a/src/viewport_sprite_sorter.h b/src/viewport_sprite_sorter.h index d9948b7c22..2a91f48a4f 100644 --- a/src/viewport_sprite_sorter.h +++ b/src/viewport_sprite_sorter.h @@ -36,7 +36,7 @@ struct ParentSpriteToDraw { int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites int32 first_child; ///< the first child to draw. - bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites + uint32 order; ///< Used during sprite sorting }; typedef std::vector ParentSpriteToSortVector; diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp index 6d1a2ceee4..876821fdda 100644 --- a/src/viewport_sprite_sorter_sse4.cpp +++ b/src/viewport_sprite_sorter_sse4.cpp @@ -13,6 +13,9 @@ #include "cpu.h" #include "smmintrin.h" #include "viewport_sprite_sorter.h" +#include +#include +#include #include "safeguards.h" @@ -23,74 +26,145 @@ # define LOAD_128 _mm_loadu_si128 #endif -/** Sort parent sprites pointer array using SSE4.1 optimizations. */ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) { + if (psdv->size() < 2) return; + const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); - auto const psdvend = psdv->end(); - auto psd = psdv->begin(); - while (psd != psdvend) { - ParentSpriteToDraw * const ps = *psd; - if (ps->comparison_done) { - psd++; + /* We rely on sprites being, for the most part, already ordered. + * So we don't need to move many of them and can keep track of their + * order efficiently by using stack. We always move sprites to the front + * of the current position, i.e. to the top of the stack. + * Also use special constants to indicate sorting state without + * adding extra fields to ParentSpriteToDraw structure. + */ + const uint32 ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it + const uint32 ORDER_RETURNED = UINT32_MAX - 1; // Mark sorted sprite in case there are other occurrences of it in the stack + std::stack sprite_order; + uint32 next_order = 0; + + std::forward_list> sprite_list; // We store sprites in a list sorted by xmin+ymin + + /* Initialize sprite list and order. */ + for (auto p = psdv->rbegin(); p != psdv->rend(); p++) { + sprite_list.push_front(std::make_pair((*p)->xmin + (*p)->ymin, *p)); + sprite_order.push(*p); + (*p)->order = next_order++; + } + + sprite_list.sort(); + + std::vector preceding; // Temporarily stores sprites that precede current and their position in the list + auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite + auto out = psdv->begin(); // Iterator to output sorted sprites + + while (!sprite_order.empty()) { + + auto s = sprite_order.top(); + sprite_order.pop(); + + /* Sprite is already sorted, ignore it. */ + if (s->order == ORDER_RETURNED) continue; + + /* Sprite was already compared, just need to output it. */ + if (s->order == ORDER_COMPARED) { + *(out++) = s; + s->order = ORDER_RETURNED; continue; } - ps->comparison_done = true; - - for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) { - ParentSpriteToDraw * const ps2 = *psd2; - - if (ps2->comparison_done) continue; - - /* - * Decide which comparator to use, based on whether the bounding boxes overlap - * - * Original code: - * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? - * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? - * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? - * - * Above conditions are equivalent to: - * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) ) - * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) - * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) ) - * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) ) - * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 --------------------------------------> - */ - __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax); - __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin); - __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min); - if (!_mm_testz_si128(mask_ptest, rslt1)) + preceding.clear(); + + /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax + * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax + * we get all we need and some more that we filter out later. + * We don't include zmin into the sum as there are usually more neighbors on x and y than z + * so including it will actually increase the amount of false positives. + * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax) + * to ensure that we iterate the current sprite as we need to remove it from the list. + */ + auto ssum = max(s->xmax, s->xmin) + max(s->ymax, s->ymin); + auto prev = sprite_list.before_begin(); + auto x = sprite_list.begin(); + while (x != sprite_list.end() && ((*x).first <= ssum)) { + auto p = (*x).second; + if (p == s) { + /* We found the current sprite, remove it and move on. */ + x = sprite_list.erase_after(prev); + continue; + } + + auto p_prev = prev; + prev = x++; + + /* Check that p->xmin <= s->xmax && p->ymin <= s->ymax && p->zmin <= s->zmax */ + __m128i s_max = LOAD_128((__m128i*) &s->xmax); + __m128i p_min = LOAD_128((__m128i*) &p->xmin); + __m128i r1 = _mm_cmplt_epi32(s_max, p_min); + if (!_mm_testz_si128(mask_ptest, r1)) continue; - __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin); - __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax); - __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min); - if (_mm_testz_si128(mask_ptest, rslt2)) { + /* Check if sprites overlap, i.e. + * s->xmin <= p->xmax && s->ymin <= p->ymax && s->zmin <= p->zmax + */ + __m128i s_min = LOAD_128((__m128i*) &s->xmin); + __m128i p_max = LOAD_128((__m128i*) &p->xmax); + __m128i r2 = _mm_cmplt_epi32(p_max, s_min); + if (_mm_testz_si128(mask_ptest, r2)) { /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of * the screen and with higher Z elevation, are drawn in front. * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, * i.e. X=(left+right)/2, etc. * However, since we only care about order, don't actually divide / 2 */ - if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= - ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { + if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <= + p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) { continue; } } - /* Move ps2 in front of ps */ - ParentSpriteToDraw * const temp = ps2; - for (auto psd3 = psd2; psd3 > psd; psd3--) { - *psd3 = *(psd3 - 1); + preceding.push_back(p); + preceding_prev = p_prev; + } + + if (preceding.empty()) { + /* No preceding sprites, add current one to the output */ + *(out++) = s; + s->order = ORDER_RETURNED; + continue; + } + + /* Optimization for the case when we only have 1 sprite to move. */ + if (preceding.size() == 1) { + auto p = preceding[0]; + /* We can only output the preceding sprite if there can't be any other sprites preceding it. */ + if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) { + p->order = ORDER_RETURNED; + s->order = ORDER_RETURNED; + sprite_list.erase_after(preceding_prev); + *(out++) = p; + *(out++) = s; + continue; } - *psd = temp; + } + + /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */ + std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) { + return a->order > b->order; + }); + + s->order = ORDER_COMPARED; + sprite_order.push(s); // Still need to output so push it back for now + + for (auto p: preceding) { + p->order = next_order++; + sprite_order.push(p); } } } + /** * Check whether the current CPU supports SSE 4.1. * @return True iff the CPU supports SSE 4.1. From e82333cf77861fd78ef156f97865b5f17ae38e4b Mon Sep 17 00:00:00 2001 From: dP Date: Sat, 15 Feb 2020 01:10:04 +0300 Subject: [PATCH 2/4] Feature #7962: Improve rendering of large viewports --- src/viewport.cpp | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/src/viewport.cpp b/src/viewport.cpp index 88a7d40464..e9e8d34dac 100644 --- a/src/viewport.cpp +++ b/src/viewport.cpp @@ -1782,32 +1782,6 @@ void ViewportDoDraw(const Viewport *vp, int left, int top, int right, int bottom _vd.child_screen_sprites_to_draw.clear(); } -/** - * Make sure we don't draw a too big area at a time. - * If we do, the sprite memory will overflow. - */ -static void ViewportDrawChk(const Viewport *vp, int left, int top, int right, int bottom) -{ - if ((int64)ScaleByZoom(bottom - top, vp->zoom) * (int64)ScaleByZoom(right - left, vp->zoom) > (int64)(180000 * ZOOM_LVL_BASE * ZOOM_LVL_BASE)) { - if ((bottom - top) > (right - left)) { - int t = (top + bottom) >> 1; - ViewportDrawChk(vp, left, top, right, t); - ViewportDrawChk(vp, left, t, right, bottom); - } else { - int t = (left + right) >> 1; - ViewportDrawChk(vp, left, top, t, bottom); - ViewportDrawChk(vp, t, top, right, bottom); - } - } else { - ViewportDoDraw(vp, - ScaleByZoom(left - vp->left, vp->zoom) + vp->virtual_left, - ScaleByZoom(top - vp->top, vp->zoom) + vp->virtual_top, - ScaleByZoom(right - vp->left, vp->zoom) + vp->virtual_left, - ScaleByZoom(bottom - vp->top, vp->zoom) + vp->virtual_top - ); - } -} - static inline void ViewportDraw(const Viewport *vp, int left, int top, int right, int bottom) { if (right <= vp->left || bottom <= vp->top) return; @@ -1822,7 +1796,12 @@ static inline void ViewportDraw(const Viewport *vp, int left, int top, int right if (top < vp->top) top = vp->top; if (bottom > vp->top + vp->height) bottom = vp->top + vp->height; - ViewportDrawChk(vp, left, top, right, bottom); + ViewportDoDraw(vp, + ScaleByZoom(left - vp->left, vp->zoom) + vp->virtual_left, + ScaleByZoom(top - vp->top, vp->zoom) + vp->virtual_top, + ScaleByZoom(right - vp->left, vp->zoom) + vp->virtual_left, + ScaleByZoom(bottom - vp->top, vp->zoom) + vp->virtual_top + ); } /** From f7f28f6ebd83583deed7c0b5773edcd783264fb8 Mon Sep 17 00:00:00 2001 From: Jonathan G Rennison Date: Sat, 30 Jan 2021 14:01:33 +0000 Subject: [PATCH 3/4] Revert "Feature #7962: Significantly improve sprite sorter performance" This reverts commit 5ca8a0bda9157ee34c12cb4360f46493dba69979. --- src/viewport.cpp | 152 ++++++++------------------ src/viewport_sprite_sorter.h | 2 +- src/viewport_sprite_sorter_sse4.cpp | 162 ++++++++-------------------- 3 files changed, 89 insertions(+), 227 deletions(-) diff --git a/src/viewport.cpp b/src/viewport.cpp index e9e8d34dac..8be0cedc81 100644 --- a/src/viewport.cpp +++ b/src/viewport.cpp @@ -89,9 +89,7 @@ #include "network/network_func.h" #include "framerate_type.h" -#include #include -#include #include "table/strings.h" #include "table/string_colours.h" @@ -728,6 +726,7 @@ void AddSortableSpriteToDraw(SpriteID image, PaletteID pal, int x, int y, int w, ps.zmin = z + bb_offset_z; ps.zmax = z + max(bb_offset_z, dz) - 1; + ps.comparison_done = false; ps.first_child = -1; _vd.last_child = &ps.first_child; @@ -1499,127 +1498,64 @@ static bool ViewportSortParentSpritesChecker() return true; } -/** Sort parent sprites pointer array replicating the way original sorter did it. */ +/** Sort parent sprites pointer array */ static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv) { - if (psdv->size() < 2) return; + auto psdvend = psdv->end(); + auto psd = psdv->begin(); + while (psd != psdvend) { + ParentSpriteToDraw *ps = *psd; - /* We rely on sprites being, for the most part, already ordered. - * So we don't need to move many of them and can keep track of their - * order efficiently by using stack. We always move sprites to the front - * of the current position, i.e. to the top of the stack. - * Also use special constants to indicate sorting state without - * adding extra fields to ParentSpriteToDraw structure. - */ - const uint32 ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it - const uint32 ORDER_RETURNED = UINT32_MAX - 1; // Makr sorted sprite in case there are other occurrences of it in the stack - std::stack sprite_order; - uint32 next_order = 0; - - std::forward_list> sprite_list; // We store sprites in a list sorted by xmin+ymin - - /* Initialize sprite list and order. */ - for (auto p = psdv->rbegin(); p != psdv->rend(); p++) { - sprite_list.push_front(std::make_pair((*p)->xmin + (*p)->ymin, *p)); - sprite_order.push(*p); - (*p)->order = next_order++; - } - - sprite_list.sort(); - - std::vector preceding; // Temporarily stores sprites that precede current and their position in the list - auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite - auto out = psdv->begin(); // Iterator to output sorted sprites - - while (!sprite_order.empty()) { - - auto s = sprite_order.top(); - sprite_order.pop(); - - /* Sprite is already sorted, ignore it. */ - if (s->order == ORDER_RETURNED) continue; - - /* Sprite was already compared, just need to output it. */ - if (s->order == ORDER_COMPARED) { - *(out++) = s; - s->order = ORDER_RETURNED; + if (ps->comparison_done) { + psd++; continue; } - preceding.clear(); - - /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax - * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax - * we get all we need and some more that we filter out later. - * We don't include zmin into the sum as there are usually more neighbors on x and y than z - * so including it will actually increase the amount of false positives. - * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax) - * to ensure that we iterate the current sprite as we need to remove it from the list. - */ - auto ssum = max(s->xmax, s->xmin) + max(s->ymax, s->ymin); - auto prev = sprite_list.before_begin(); - auto x = sprite_list.begin(); - while (x != sprite_list.end() && ((*x).first <= ssum)) { - auto p = (*x).second; - if (p == s) { - /* We found the current sprite, remove it and move on. */ - x = sprite_list.erase_after(prev); - continue; - } - - auto p_prev = prev; - prev = x++; - - if (s->xmax < p->xmin || s->ymax < p->ymin || s->zmax < p->zmin) continue; - if (s->xmin <= p->xmax && // overlap in X? - s->ymin <= p->ymax && // overlap in Y? - s->zmin <= p->zmax) { // overlap in Z? - if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <= - p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) { + ps->comparison_done = true; + + for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) { + ParentSpriteToDraw *ps2 = *psd2; + + if (ps2->comparison_done) continue; + + /* Decide which comparator to use, based on whether the bounding + * boxes overlap + */ + if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? + ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? + ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? + /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of + * the screen and with higher Z elevation, are drawn in front. + * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, + * i.e. X=(left+right)/2, etc. + * However, since we only care about order, don't actually divide / 2 + */ + if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= + ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { + continue; + } + } else { + /* We only change the order, if it is definite. + * I.e. every single order of X, Y, Z says ps2 is behind ps or they overlap. + * That is: If one partial order says ps behind ps2, do not change the order. + */ + if (ps->xmax < ps2->xmin || + ps->ymax < ps2->ymin || + ps->zmax < ps2->zmin) { continue; } } - preceding.push_back(p); - preceding_prev = p_prev; - } - - if (preceding.empty()) { - /* No preceding sprites, add current one to the output */ - *(out++) = s; - s->order = ORDER_RETURNED; - continue; - } - /* Optimization for the case when we only have 1 sprite to move. */ - if (preceding.size() == 1) { - auto p = preceding[0]; - /* We can only output the preceding sprite if there can't be any other sprites preceding it. */ - if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) { - p->order = ORDER_RETURNED; - s->order = ORDER_RETURNED; - sprite_list.erase_after(preceding_prev); - *(out++) = p; - *(out++) = s; - continue; + /* Move ps2 in front of ps */ + ParentSpriteToDraw *temp = ps2; + for (auto psd3 = psd2; psd3 > psd; psd3--) { + *psd3 = *(psd3 - 1); } - } - - /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */ - std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) { - return a->order > b->order; - }); - - s->order = ORDER_COMPARED; - sprite_order.push(s); // Still need to output so push it back for now - - for (auto p: preceding) { - p->order = next_order++; - sprite_order.push(p); + *psd = temp; } } } - static void ViewportDrawParentSprites(const ParentSpriteToSortVector *psd, const ChildScreenSpriteToDrawVector *csstdv) { for (const ParentSpriteToDraw *ps : *psd) { diff --git a/src/viewport_sprite_sorter.h b/src/viewport_sprite_sorter.h index 2a91f48a4f..d9948b7c22 100644 --- a/src/viewport_sprite_sorter.h +++ b/src/viewport_sprite_sorter.h @@ -36,7 +36,7 @@ struct ParentSpriteToDraw { int32 top; ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites int32 first_child; ///< the first child to draw. - uint32 order; ///< Used during sprite sorting + bool comparison_done; ///< Used during sprite sorting: true if sprite has been compared with all other sprites }; typedef std::vector ParentSpriteToSortVector; diff --git a/src/viewport_sprite_sorter_sse4.cpp b/src/viewport_sprite_sorter_sse4.cpp index 876821fdda..6d1a2ceee4 100644 --- a/src/viewport_sprite_sorter_sse4.cpp +++ b/src/viewport_sprite_sorter_sse4.cpp @@ -13,9 +13,6 @@ #include "cpu.h" #include "smmintrin.h" #include "viewport_sprite_sorter.h" -#include -#include -#include #include "safeguards.h" @@ -26,145 +23,74 @@ # define LOAD_128 _mm_loadu_si128 #endif +/** Sort parent sprites pointer array using SSE4.1 optimizations. */ void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv) { - if (psdv->size() < 2) return; - const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0); + auto const psdvend = psdv->end(); + auto psd = psdv->begin(); + while (psd != psdvend) { + ParentSpriteToDraw * const ps = *psd; - /* We rely on sprites being, for the most part, already ordered. - * So we don't need to move many of them and can keep track of their - * order efficiently by using stack. We always move sprites to the front - * of the current position, i.e. to the top of the stack. - * Also use special constants to indicate sorting state without - * adding extra fields to ParentSpriteToDraw structure. - */ - const uint32 ORDER_COMPARED = UINT32_MAX; // Sprite was compared but we still need to compare the ones preceding it - const uint32 ORDER_RETURNED = UINT32_MAX - 1; // Mark sorted sprite in case there are other occurrences of it in the stack - std::stack sprite_order; - uint32 next_order = 0; - - std::forward_list> sprite_list; // We store sprites in a list sorted by xmin+ymin - - /* Initialize sprite list and order. */ - for (auto p = psdv->rbegin(); p != psdv->rend(); p++) { - sprite_list.push_front(std::make_pair((*p)->xmin + (*p)->ymin, *p)); - sprite_order.push(*p); - (*p)->order = next_order++; - } - - sprite_list.sort(); - - std::vector preceding; // Temporarily stores sprites that precede current and their position in the list - auto preceding_prev = sprite_list.begin(); // Store iterator in case we need to delete a single preciding sprite - auto out = psdv->begin(); // Iterator to output sorted sprites - - while (!sprite_order.empty()) { - - auto s = sprite_order.top(); - sprite_order.pop(); - - /* Sprite is already sorted, ignore it. */ - if (s->order == ORDER_RETURNED) continue; - - /* Sprite was already compared, just need to output it. */ - if (s->order == ORDER_COMPARED) { - *(out++) = s; - s->order = ORDER_RETURNED; + if (ps->comparison_done) { + psd++; continue; } - preceding.clear(); - - /* We only need sprites with xmin <= s->xmax && ymin <= s->ymax && zmin <= s->zmax - * So by iterating sprites with xmin + ymin <= s->xmax + s->ymax - * we get all we need and some more that we filter out later. - * We don't include zmin into the sum as there are usually more neighbors on x and y than z - * so including it will actually increase the amount of false positives. - * Also min coordinates can be > max so using max(xmin, xmax) + max(ymin, ymax) - * to ensure that we iterate the current sprite as we need to remove it from the list. - */ - auto ssum = max(s->xmax, s->xmin) + max(s->ymax, s->ymin); - auto prev = sprite_list.before_begin(); - auto x = sprite_list.begin(); - while (x != sprite_list.end() && ((*x).first <= ssum)) { - auto p = (*x).second; - if (p == s) { - /* We found the current sprite, remove it and move on. */ - x = sprite_list.erase_after(prev); - continue; - } - - auto p_prev = prev; - prev = x++; - - /* Check that p->xmin <= s->xmax && p->ymin <= s->ymax && p->zmin <= s->zmax */ - __m128i s_max = LOAD_128((__m128i*) &s->xmax); - __m128i p_min = LOAD_128((__m128i*) &p->xmin); - __m128i r1 = _mm_cmplt_epi32(s_max, p_min); - if (!_mm_testz_si128(mask_ptest, r1)) + ps->comparison_done = true; + + for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) { + ParentSpriteToDraw * const ps2 = *psd2; + + if (ps2->comparison_done) continue; + + /* + * Decide which comparator to use, based on whether the bounding boxes overlap + * + * Original code: + * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X? + * ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y? + * ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z? + * + * Above conditions are equivalent to: + * 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) ) + * 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) + * 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) ) + * 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) ) + * 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 --------------------------------------> + */ + __m128i ps1_max = LOAD_128((__m128i*) &ps->xmax); + __m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin); + __m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min); + if (!_mm_testz_si128(mask_ptest, rslt1)) continue; - /* Check if sprites overlap, i.e. - * s->xmin <= p->xmax && s->ymin <= p->ymax && s->zmin <= p->zmax - */ - __m128i s_min = LOAD_128((__m128i*) &s->xmin); - __m128i p_max = LOAD_128((__m128i*) &p->xmax); - __m128i r2 = _mm_cmplt_epi32(p_max, s_min); - if (_mm_testz_si128(mask_ptest, r2)) { + __m128i ps1_min = LOAD_128((__m128i*) &ps->xmin); + __m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax); + __m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min); + if (_mm_testz_si128(mask_ptest, rslt2)) { /* Use X+Y+Z as the sorting order, so sprites closer to the bottom of * the screen and with higher Z elevation, are drawn in front. * Here X,Y,Z are the coordinates of the "center of mass" of the sprite, * i.e. X=(left+right)/2, etc. * However, since we only care about order, don't actually divide / 2 */ - if (s->xmin + s->xmax + s->ymin + s->ymax + s->zmin + s->zmax <= - p->xmin + p->xmax + p->ymin + p->ymax + p->zmin + p->zmax) { + if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <= + ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) { continue; } } - preceding.push_back(p); - preceding_prev = p_prev; - } - - if (preceding.empty()) { - /* No preceding sprites, add current one to the output */ - *(out++) = s; - s->order = ORDER_RETURNED; - continue; - } - - /* Optimization for the case when we only have 1 sprite to move. */ - if (preceding.size() == 1) { - auto p = preceding[0]; - /* We can only output the preceding sprite if there can't be any other sprites preceding it. */ - if (p->xmax <= s->xmax && p->ymax <= s->ymax && p->zmax <= s->zmax) { - p->order = ORDER_RETURNED; - s->order = ORDER_RETURNED; - sprite_list.erase_after(preceding_prev); - *(out++) = p; - *(out++) = s; - continue; + /* Move ps2 in front of ps */ + ParentSpriteToDraw * const temp = ps2; + for (auto psd3 = psd2; psd3 > psd; psd3--) { + *psd3 = *(psd3 - 1); } - } - - /* Sort all preceding sprites by order and assign new orders in reverse (as original sorter did). */ - std::sort(preceding.begin(), preceding.end(), [](const ParentSpriteToDraw *a, const ParentSpriteToDraw *b) { - return a->order > b->order; - }); - - s->order = ORDER_COMPARED; - sprite_order.push(s); // Still need to output so push it back for now - - for (auto p: preceding) { - p->order = next_order++; - sprite_order.push(p); + *psd = temp; } } } - /** * Check whether the current CPU supports SSE 4.1. * @return True iff the CPU supports SSE 4.1. From c4f63661bbf88e9598124d862508880f21715065 Mon Sep 17 00:00:00 2001 From: Jonathan G Rennison Date: Sat, 30 Jan 2021 14:01:37 +0000 Subject: [PATCH 4/4] Revert "Feature #7962: Improve rendering of large viewports" This reverts commit e82333cf77861fd78ef156f97865b5f17ae38e4b. --- src/viewport.cpp | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/viewport.cpp b/src/viewport.cpp index 8be0cedc81..bf1f34acd6 100644 --- a/src/viewport.cpp +++ b/src/viewport.cpp @@ -1718,6 +1718,32 @@ void ViewportDoDraw(const Viewport *vp, int left, int top, int right, int bottom _vd.child_screen_sprites_to_draw.clear(); } +/** + * Make sure we don't draw a too big area at a time. + * If we do, the sprite memory will overflow. + */ +static void ViewportDrawChk(const Viewport *vp, int left, int top, int right, int bottom) +{ + if ((int64)ScaleByZoom(bottom - top, vp->zoom) * (int64)ScaleByZoom(right - left, vp->zoom) > (int64)(180000 * ZOOM_LVL_BASE * ZOOM_LVL_BASE)) { + if ((bottom - top) > (right - left)) { + int t = (top + bottom) >> 1; + ViewportDrawChk(vp, left, top, right, t); + ViewportDrawChk(vp, left, t, right, bottom); + } else { + int t = (left + right) >> 1; + ViewportDrawChk(vp, left, top, t, bottom); + ViewportDrawChk(vp, t, top, right, bottom); + } + } else { + ViewportDoDraw(vp, + ScaleByZoom(left - vp->left, vp->zoom) + vp->virtual_left, + ScaleByZoom(top - vp->top, vp->zoom) + vp->virtual_top, + ScaleByZoom(right - vp->left, vp->zoom) + vp->virtual_left, + ScaleByZoom(bottom - vp->top, vp->zoom) + vp->virtual_top + ); + } +} + static inline void ViewportDraw(const Viewport *vp, int left, int top, int right, int bottom) { if (right <= vp->left || bottom <= vp->top) return; @@ -1732,12 +1758,7 @@ static inline void ViewportDraw(const Viewport *vp, int left, int top, int right if (top < vp->top) top = vp->top; if (bottom > vp->top + vp->height) bottom = vp->top + vp->height; - ViewportDoDraw(vp, - ScaleByZoom(left - vp->left, vp->zoom) + vp->virtual_left, - ScaleByZoom(top - vp->top, vp->zoom) + vp->virtual_top, - ScaleByZoom(right - vp->left, vp->zoom) + vp->virtual_left, - ScaleByZoom(bottom - vp->top, vp->zoom) + vp->virtual_top - ); + ViewportDrawChk(vp, left, top, right, bottom); } /**