mirror of
https://github.com/JGRennison/OpenTTD-patches.git
synced 2024-10-31 15:20:10 +00:00
a484a5eb77
Extend bb sprite south for comparisons, but only for vehicles underneath This is to avoid creating sprite sorting problems for vehicles on top of the bridge Adjust ParentSpriteToDraw struct
111 lines
4.4 KiB
C++
111 lines
4.4 KiB
C++
/*
|
|
* This file is part of OpenTTD.
|
|
* OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
|
|
* OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
* See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/** @file viewport_sprite_sorter_sse4.cpp Sprite sorter that uses SSE4.1. */
|
|
|
|
#ifdef WITH_SSE
|
|
|
|
#include "stdafx.h"
|
|
#include "cpu.h"
|
|
#include "smmintrin.h"
|
|
#include "viewport_sprite_sorter.h"
|
|
#include "viewport_func.h"
|
|
|
|
#include "safeguards.h"
|
|
|
|
static_assert((sizeof(ParentSpriteToDraw) % 16) == 0);
|
|
#ifdef POINTER_IS_64BIT
|
|
# define LOAD_128 _mm_load_si128
|
|
#else
|
|
# define LOAD_128 _mm_loadu_si128
|
|
#endif
|
|
|
|
/** Sort parent sprites pointer array using SSE4.1 optimizations. */
|
|
GNU_TARGET("sse4.1")
|
|
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
|
|
{
|
|
const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0);
|
|
ParentSpriteToDraw ** const psdvend = psdv->data() + psdv->size();
|
|
ParentSpriteToDraw **psd = psdv->data();
|
|
while (psd != psdvend) {
|
|
ParentSpriteToDraw * const ps = *psd;
|
|
|
|
if (ps->IsComparisonDone()) {
|
|
psd++;
|
|
continue;
|
|
}
|
|
|
|
ps->SetComparisonDone(true);
|
|
const bool is_special = (ps->special_flags & VSSSF_SORT_SPECIAL) != 0;
|
|
|
|
for (auto psd2 = psd + 1; psd2 != psdvend; psd2++) {
|
|
ParentSpriteToDraw * const ps2 = *psd2;
|
|
|
|
if (ps2->IsComparisonDone()) continue;
|
|
|
|
if (is_special && (ps2->special_flags & VSSSF_SORT_SPECIAL) != 0) {
|
|
if (ViewportSortParentSpritesSpecial(ps, ps2, psd, psd2)) continue;
|
|
}
|
|
|
|
/*
|
|
* Decide which comparator to use, based on whether the bounding boxes overlap
|
|
*
|
|
* Original code:
|
|
* if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
|
|
* ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
|
|
* ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
|
|
*
|
|
* Above conditions are equivalent to:
|
|
* 1/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
|
|
* 2/ !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) && (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
|
|
* 3/ !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) && ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
|
|
* 4/ !( !( (ps->xmax < ps2->xmin) || (ps->ymax < ps2->ymin) || (ps->zmax < ps2->zmin) ) && !( (ps2->xmax < ps->xmin) || (ps2->ymax < ps->ymin) || (ps2->zmax < ps->zmin) ) )
|
|
* 5/ PTEST <---------------------------------- rslt1 ----------------------------------> <------------------------------ rslt2 -------------------------------------->
|
|
*/
|
|
__m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
|
|
__m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
|
|
__m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
|
|
if (!_mm_testz_si128(mask_ptest, rslt1))
|
|
continue;
|
|
|
|
__m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
|
|
__m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);
|
|
__m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min);
|
|
if (_mm_testz_si128(mask_ptest, rslt2)) {
|
|
/* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
|
|
* the screen and with higher Z elevation, are drawn in front.
|
|
* Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
|
|
* i.e. X=(left+right)/2, etc.
|
|
* However, since we only care about order, don't actually divide / 2
|
|
*/
|
|
if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <=
|
|
ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Move ps2 in front of ps */
|
|
ParentSpriteToDraw * const temp = ps2;
|
|
for (auto psd3 = psd2; psd3 > psd; psd3--) {
|
|
*psd3 = *(psd3 - 1);
|
|
}
|
|
*psd = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check whether the current CPU supports SSE 4.1.
|
|
* @return True iff the CPU supports SSE 4.1.
|
|
*/
|
|
bool ViewportSortParentSpritesSSE41Checker()
|
|
{
|
|
return HasCPUIDFlag(1, 2, 19);
|
|
}
|
|
|
|
#endif /* WITH_SSE */
|