Add: 32bpp SSE2 blitter palette animator (#6795)

Create a new blitter mode: 32bpp-sse2-anim, which is 32bpp-anim + this.
32bpp-sse2-anim is now used by default where 32bpp-anim would have been.
Also use this with the 32bpp-sse4-anim blitter.

See issue #6469.
save_ext
Jonathan G Rennison 6 years ago committed by PeterN
parent 306b999cf4
commit 17257b9620

@ -1149,6 +1149,8 @@
<ClCompile Include="..\src\script\api\script_window.cpp" />
<ClCompile Include="..\src\blitter\32bpp_anim.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse4.hpp" />
<ClCompile Include="..\src\blitter\32bpp_base.cpp" />

@ -2604,6 +2604,12 @@
<ClInclude Include="..\src\blitter\32bpp_anim.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp">
<Filter>Blitters</Filter>
</ClCompile>
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp">
<Filter>Blitters</Filter>
</ClCompile>

@ -1170,6 +1170,8 @@
<ClCompile Include="..\src\script\api\script_window.cpp" />
<ClCompile Include="..\src\blitter\32bpp_anim.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse4.hpp" />
<ClCompile Include="..\src\blitter\32bpp_base.cpp" />

@ -2604,6 +2604,12 @@
<ClInclude Include="..\src\blitter\32bpp_anim.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp">
<Filter>Blitters</Filter>
</ClCompile>
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp">
<Filter>Blitters</Filter>
</ClCompile>

@ -1170,6 +1170,8 @@
<ClCompile Include="..\src\script\api\script_window.cpp" />
<ClCompile Include="..\src\blitter\32bpp_anim.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp" />
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp" />
<ClInclude Include="..\src\blitter\32bpp_anim_sse4.hpp" />
<ClCompile Include="..\src\blitter\32bpp_base.cpp" />

@ -2604,6 +2604,12 @@
<ClInclude Include="..\src\blitter\32bpp_anim.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse2.cpp">
<Filter>Blitters</Filter>
</ClCompile>
<ClInclude Include="..\src\blitter\32bpp_anim_sse2.hpp">
<Filter>Blitters</Filter>
</ClInclude>
<ClCompile Include="..\src\blitter\32bpp_anim_sse4.cpp">
<Filter>Blitters</Filter>
</ClCompile>

@ -3850,6 +3850,14 @@
RelativePath=".\..\src\blitter\32bpp_anim.hpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse2.cpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse2.hpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse4.cpp"
>

@ -3847,6 +3847,14 @@
RelativePath=".\..\src\blitter\32bpp_anim.hpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse2.cpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse2.hpp"
>
</File>
<File
RelativePath=".\..\src\blitter\32bpp_anim_sse4.cpp"
>

@ -913,6 +913,8 @@ script/api/script_window.cpp
blitter/32bpp_anim.cpp
blitter/32bpp_anim.hpp
#if SSE
blitter/32bpp_anim_sse2.cpp
blitter/32bpp_anim_sse2.hpp
blitter/32bpp_anim_sse4.cpp
blitter/32bpp_anim_sse4.hpp
#end

@ -22,7 +22,7 @@ static FBlitter_32bppAnim iFBlitter_32bppAnim;
Blitter_32bppAnim::~Blitter_32bppAnim()
{
free(this->anim_buf);
free(this->anim_alloc);
}
template <BlitterMode mode>
@ -39,8 +39,7 @@ inline void Blitter_32bppAnim::Draw(const Blitter::BlitterParams *bp, ZoomLevel
}
Colour *dst = (Colour *)bp->dst + bp->top * bp->pitch + bp->left;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'bp->dst' into an 'anim_buf' offset below.
uint16 *anim = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_pitch + bp->left;
uint16 *anim = this->anim_buf + this->ScreenToAnimOffset((uint32 *)bp->dst) + bp->top * this->anim_buf_pitch + bp->left;
const byte *remap = bp->remap; // store so we don't have to access it via bp everytime
@ -280,8 +279,7 @@ void Blitter_32bppAnim::DrawColourMappingRect(void *dst, int width, int height,
}
Colour *udst = (Colour *)dst;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'dst' into an 'anim_buf' offset below.
uint16 *anim = this->anim_buf + ((uint32 *)dst - (uint32 *)_screen.dst_ptr);
uint16 *anim = this->anim_buf + this->ScreenToAnimOffset((uint32 *)dst);
if (pal == PALETTE_TO_TRANSPARENT) {
do {
@ -319,8 +317,8 @@ void Blitter_32bppAnim::SetPixel(void *video, int x, int y, uint8 colour)
/* Set the colour in the anim-buffer too, if we are rendering to the screen */
if (_screen_disable_anim) return;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below.
this->anim_buf[((uint32 *)video - (uint32 *)_screen.dst_ptr) + x + y * this->anim_buf_pitch] = colour | (DEFAULT_BRIGHTNESS << 8);
this->anim_buf[this->ScreenToAnimOffset((uint32 *)video) + x + y * this->anim_buf_pitch] = colour | (DEFAULT_BRIGHTNESS << 8);
}
void Blitter_32bppAnim::DrawRect(void *video, int width, int height, uint8 colour)
@ -332,8 +330,7 @@ void Blitter_32bppAnim::DrawRect(void *video, int width, int height, uint8 colou
}
Colour colour32 = LookupColourInPalette(colour);
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below.
uint16 *anim_line = ((uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf;
uint16 *anim_line = this->ScreenToAnimOffset((uint32 *)video) + this->anim_buf;
do {
Colour *dst = (Colour *)video;
@ -357,8 +354,7 @@ void Blitter_32bppAnim::CopyFromBuffer(void *video, const void *src, int width,
assert(video >= _screen.dst_ptr && video <= (uint32 *)_screen.dst_ptr + _screen.width + _screen.height * _screen.pitch);
Colour *dst = (Colour *)video;
const uint32 *usrc = (const uint32 *)src;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below.
uint16 *anim_line = ((uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf;
uint16 *anim_line = this->ScreenToAnimOffset((uint32 *)video) + this->anim_buf;
for (; height > 0; height--) {
/* We need to keep those for palette animation. */
@ -401,8 +397,7 @@ void Blitter_32bppAnim::CopyToBuffer(const void *video, void *dst, int width, in
if (this->anim_buf == NULL) return;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'video' into an 'anim_buf' offset below.
const uint16 *anim_line = ((const uint32 *)video - (uint32 *)_screen.dst_ptr) + this->anim_buf;
const uint16 *anim_line = this->ScreenToAnimOffset((const uint32 *)video) + this->anim_buf;
for (; height > 0; height--) {
memcpy(udst, src, width * sizeof(uint32));
@ -485,18 +480,22 @@ void Blitter_32bppAnim::PaletteAnimate(const Palette &palette)
Colour *dst = (Colour *)_screen.dst_ptr;
/* Let's walk the anim buffer and try to find the pixels */
const int width = this->anim_buf_width;
const int pitch_offset = _screen.pitch - width;
const int anim_pitch_offset = this->anim_buf_pitch - width;
for (int y = this->anim_buf_height; y != 0 ; y--) {
for (int x = this->anim_buf_width; x != 0 ; x--) {
uint colour = GB(*anim, 0, 8);
for (int x = width; x != 0 ; x--) {
uint16 value = *anim;
uint8 colour = GB(value, 0, 8);
if (colour >= PALETTE_ANIM_START) {
/* Update this pixel */
*dst = this->AdjustBrightness(LookupColourInPalette(colour), GB(*anim, 8, 8));
*dst = this->AdjustBrightness(LookupColourInPalette(colour), GB(value, 8, 8));
}
dst++;
anim++;
}
dst += _screen.pitch - this->anim_buf_width;
anim += this->anim_buf_pitch - this->anim_buf_width;
dst += pitch_offset;
anim += anim_pitch_offset;
}
/* Make sure the backend redraws the whole screen */
@ -513,10 +512,13 @@ void Blitter_32bppAnim::PostResize()
if (_screen.width != this->anim_buf_width || _screen.height != this->anim_buf_height ||
_screen.pitch != this->anim_buf_pitch) {
/* The size of the screen changed; we can assume we can wipe all data from our buffer */
free(this->anim_buf);
free(this->anim_alloc);
this->anim_buf_width = _screen.width;
this->anim_buf_height = _screen.height;
this->anim_buf_pitch = _screen.pitch;
this->anim_buf = CallocT<uint16>(this->anim_buf_height * this->anim_buf_pitch);
this->anim_buf_pitch = (_screen.width + 7) & ~7;
this->anim_alloc = CallocT<uint16>(this->anim_buf_pitch * this->anim_buf_height + 8);
/* align buffer to next 16 byte boundary */
this->anim_buf = reinterpret_cast<uint16 *>((reinterpret_cast<uintptr_t>(this->anim_alloc) + 0xF) & (~0xF));
}
}

@ -18,14 +18,16 @@
class Blitter_32bppAnim : public Blitter_32bppOptimized {
protected:
uint16 *anim_buf; ///< In this buffer we keep track of the 8bpp indexes so we can do palette animation
void *anim_alloc; ///< The raw allocated buffer, not necessarily aligned correctly
int anim_buf_width; ///< The width of the animation buffer.
int anim_buf_height; ///< The height of the animation buffer.
int anim_buf_pitch; ///< The pitch of the animation buffer.
int anim_buf_pitch; ///< The pitch of the animation buffer (width rounded up to 16 byte boundary).
Palette palette; ///< The current palette.
public:
Blitter_32bppAnim() :
anim_buf(NULL),
anim_alloc(NULL),
anim_buf_width(0),
anim_buf_height(0),
anim_buf_pitch(0)
@ -58,6 +60,15 @@ public:
return this->palette.palette[index];
}
inline int ScreenToAnimOffset(const uint32 *video)
{
int raw_offset = video - (const uint32 *)_screen.dst_ptr;
if (_screen.pitch == this->anim_buf_pitch) return raw_offset;
int lines = raw_offset / _screen.pitch;
int across = raw_offset % _screen.pitch;
return across + (lines * this->anim_buf_pitch);
}
template <BlitterMode mode> void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
};

@ -0,0 +1,100 @@
/* $Id$ */
/*
* This file is part of OpenTTD.
* OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
* OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file 32bpp_anim.cpp Implementation of a partially SSSE2 32bpp blitter with animation support. */
#ifdef WITH_SSE
#include "../stdafx.h"
#include "../video/video_driver.hpp"
#include "32bpp_anim_sse2.hpp"
#include "32bpp_sse_func.hpp"
#include "../safeguards.h"
/** Instantiation of the partially SSSE2 32bpp with animation blitter factory. */
static FBlitter_32bppSSE2_Anim iFBlitter_32bppSSE2_Anim;
void Blitter_32bppSSE2_Anim::PaletteAnimate(const Palette &palette)
{
assert(!_screen_disable_anim);
this->palette = palette;
/* If first_dirty is 0, it is for 8bpp indication to send the new
* palette. However, only the animation colours might possibly change.
* Especially when going between toyland and non-toyland. */
assert(this->palette.first_dirty == PALETTE_ANIM_START || this->palette.first_dirty == 0);
const uint16 *anim = this->anim_buf;
Colour *dst = (Colour *)_screen.dst_ptr;
bool screen_dirty = false;
/* Let's walk the anim buffer and try to find the pixels */
const int width = this->anim_buf_width;
const int screen_pitch = _screen.pitch;
const int anim_pitch = this->anim_buf_pitch;
__m128i anim_cmp = _mm_set1_epi16(PALETTE_ANIM_START - 1);
__m128i brightness_cmp = _mm_set1_epi16(Blitter_32bppBase::DEFAULT_BRIGHTNESS);
__m128i colour_mask = _mm_set1_epi16(0xFF);
for (int y = this->anim_buf_height; y != 0 ; y--) {
Colour *next_dst_ln = dst + screen_pitch;
const uint16 *next_anim_ln = anim + anim_pitch;
int x = width;
while (x > 0) {
__m128i data = _mm_load_si128((const __m128i *) anim);
/* low bytes only, shifted into high positions */
__m128i colour_data = _mm_and_si128(data, colour_mask);
/* test if any colour >= PALETTE_ANIM_START */
int colour_cmp_result = _mm_movemask_epi8(_mm_cmpgt_epi16(colour_data, anim_cmp));
if (colour_cmp_result) {
/* test if any brightness is unexpected */
if (x < 8 || colour_cmp_result != 0xFFFF ||
_mm_movemask_epi8(_mm_cmpeq_epi16(_mm_srli_epi16(data, 8), brightness_cmp)) != 0xFFFF) {
/* slow path: < 8 pixels left or unexpected brightnesses */
for (int z = min<int>(x, 8); z != 0 ; z--) {
int value = _mm_extract_epi16(data, 0);
uint8 colour = GB(value, 0, 8);
if (colour >= PALETTE_ANIM_START) {
/* Update this pixel */
*dst = AdjustBrightneSSE(LookupColourInPalette(colour), GB(value, 8, 8));
screen_dirty = true;
}
data = _mm_srli_si128(data, 2);
dst++;
}
} else {
/* medium path: 8 pixels to animate all of expected brightnesses */
for (int z = 0; z < 8; z++) {
*dst = LookupColourInPalette(_mm_extract_epi16(colour_data, 0));
colour_data = _mm_srli_si128(colour_data, 2);
dst++;
}
screen_dirty = true;
}
} else {
/* fast path, no animation */
dst += 8;
}
anim += 8;
x -= 8;
}
dst = next_dst_ln;
anim = next_anim_ln;
}
if (screen_dirty) {
/* Make sure the backend redraws the whole screen */
VideoDriver::GetInstance()->MakeDirty(0, 0, _screen.width, _screen.height);
}
}
#endif /* WITH_SSE */

@ -0,0 +1,43 @@
/* $Id$ */
/*
* This file is part of OpenTTD.
* OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
* OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file 32bpp_anim.hpp A partially SSE2 32 bpp blitter with animation support. */
#ifndef BLITTER_32BPP_SSE2_ANIM_HPP
#define BLITTER_32BPP_SSE2_ANIM_HPP
#ifdef WITH_SSE
#ifndef SSE_VERSION
#define SSE_VERSION 2
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 1
#endif
#include "32bpp_anim.hpp"
#include "32bpp_sse2.hpp"
/** A partially 32 bpp blitter with palette animation. */
class Blitter_32bppSSE2_Anim : public Blitter_32bppAnim {
public:
/* virtual */ void PaletteAnimate(const Palette &palette);
/* virtual */ const char *GetName() { return "32bpp-sse2-anim"; }
};
/** Factory for the partially 32bpp blitter with animation. */
class FBlitter_32bppSSE2_Anim : public BlitterFactory {
public:
FBlitter_32bppSSE2_Anim() : BlitterFactory("32bpp-sse2-anim", "32bpp partially SSE2 Animation Blitter (palette animation)", HasCPUIDFlag(1, 3, 26)) {}
/* virtual */ Blitter *CreateInstance() { return new Blitter_32bppSSE2_Anim(); }
};
#endif /* WITH_SSE */
#endif /* BLITTER_32BPP_ANIM_HPP */

@ -35,8 +35,7 @@ inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomL
{
const byte * const remap = bp->remap;
Colour *dst_line = (Colour *) bp->dst + bp->top * bp->pitch + bp->left;
assert(_screen.pitch == this->anim_buf_pitch); // precondition for translating 'bp->dst' into an 'anim_buf' offset below.
uint16 *anim_line = this->anim_buf + ((uint32 *)bp->dst - (uint32 *)_screen.dst_ptr) + bp->top * this->anim_buf_pitch + bp->left;
uint16 *anim_line = this->anim_buf + this->ScreenToAnimOffset((uint32 *)bp->dst) + bp->top * this->anim_buf_pitch + bp->left;
int effective_width = bp->width;
/* Find where to start reading in the source sprite. */

@ -23,13 +23,14 @@
#endif
#include "32bpp_anim.hpp"
#include "32bpp_anim_sse2.hpp"
#include "32bpp_sse4.hpp"
#undef MARGIN_NORMAL_THRESHOLD
#define MARGIN_NORMAL_THRESHOLD 4
/** The SSE4 32 bpp blitter with palette animation. */
class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppAnim, public Blitter_32bppSSE_Base {
class Blitter_32bppSSE4_Anim FINAL : public Blitter_32bppSSE2_Anim, public Blitter_32bppSSE_Base {
private:
public:

@ -143,6 +143,36 @@ void Blitter_32bppBase::PaletteAnimate(const Palette &palette)
/* By default, 32bpp doesn't have palette animation */
}
Colour Blitter_32bppBase::ReallyAdjustBrightness(Colour colour, uint8 brightness)
{
assert(DEFAULT_BRIGHTNESS == 1 << 7);
uint64 combined = (((uint64) colour.r) << 32) | (((uint64) colour.g) << 16) | ((uint64) colour.b);
combined *= brightness;
uint16 r = GB(combined, 39, 9);
uint16 g = GB(combined, 23, 9);
uint16 b = GB(combined, 7, 9);
if ((combined & 0x800080008000L) == 0L) {
return Colour(r, g, b, colour.a);
}
uint16 ob = 0;
/* Sum overbright */
if (r > 255) ob += r - 255;
if (g > 255) ob += g - 255;
if (b > 255) ob += b - 255;
/* Reduce overbright strength */
ob /= 2;
return Colour(
r >= 255 ? 255 : min(r + ob * (255 - r) / 256, 255),
g >= 255 ? 255 : min(g + ob * (255 - g) / 256, 255),
b >= 255 ? 255 : min(b + ob * (255 - b) / 256, 255),
colour.a);
}
Blitter::PaletteAnimation Blitter_32bppBase::UsePaletteAnimation()
{
return Blitter::PALETTE_ANIMATION_NONE;

@ -146,30 +146,14 @@ public:
static const int DEFAULT_BRIGHTNESS = 128;
static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness);
static inline Colour AdjustBrightness(Colour colour, uint8 brightness)
{
/* Shortcut for normal brightness */
if (brightness == DEFAULT_BRIGHTNESS) return colour;
uint16 ob = 0;
uint16 r = colour.r * brightness / DEFAULT_BRIGHTNESS;
uint16 g = colour.g * brightness / DEFAULT_BRIGHTNESS;
uint16 b = colour.b * brightness / DEFAULT_BRIGHTNESS;
/* Sum overbright */
if (r > 255) ob += r - 255;
if (g > 255) ob += g - 255;
if (b > 255) ob += b - 255;
if (ob == 0) return Colour(r, g, b, colour.a);
/* Reduce overbright strength */
ob /= 2;
return Colour(
r >= 255 ? 255 : min(r + ob * (255 - r) / 256, 255),
g >= 255 ? 255 : min(g + ob * (255 - g) / 256, 255),
b >= 255 ? 255 : min(b + ob * (255 - b) / 256, 255),
colour.a);
return ReallyAdjustBrightness(colour, brightness);
}
};

@ -284,6 +284,9 @@ static bool SwitchNewGRFBlitter()
#endif
{ "8bpp-optimized", 2, 8, 8, 8, 8 },
{ "32bpp-optimized", 0, 8, 32, 8, 32 },
#ifdef WITH_SSE
{ "32bpp-sse2-anim", 1, 8, 32, 8, 32 },
#endif
{ "32bpp-anim", 1, 8, 32, 8, 32 },
};

Loading…
Cancel
Save