|
|
|
@ -50,7 +50,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
|
|
|
|
|
/* Load these variables into register before loop. */
|
|
|
|
|
const __m128i a_cm = ALPHA_CONTROL_MASK;
|
|
|
|
|
const __m128i pack_hi_cm = PACK_HIGH_CONTROL_MASK;
|
|
|
|
|
const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
|
|
|
|
|
const __m128i briAB_cm = BRIGHTNESS_LOW_CONTROL_MASK;
|
|
|
|
|
const __m128i div_cleaner = BRIGHTNESS_DIV_CLEANER;
|
|
|
|
|
const __m128i ob_check = OVERBRIGHT_PRESENCE_MASK;
|
|
|
|
@ -79,27 +79,19 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case RM_WITH_SKIP: {
|
|
|
|
|
__m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
for (uint x = (uint) effective_width / 2; x > 0; x--) {
|
|
|
|
|
ALPHA_BLEND_2(pack_hi_cm);
|
|
|
|
|
/* With high repack, srcABCD have its 2 blended pixels like: [S0 S1 S2 S3] -> [-- -- BS0 BS1]
|
|
|
|
|
* dstABCD shuffled: [D0 D1 D2 D3] -> [D2 D3 D0 D0]
|
|
|
|
|
* PALIGNR takes what's in (): [-- -- (BS0 BS1] [D2 D3) D0 D0]
|
|
|
|
|
*/
|
|
|
|
|
dstABCD = _mm_shuffle_epi32(dstABCD, 0x0E);
|
|
|
|
|
srcABCD = _mm_alignr_epi8(dstABCD, srcABCD, 8);
|
|
|
|
|
Colour *old_dst = dst;
|
|
|
|
|
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
|
|
|
|
|
ALPHA_BLEND_2(pack_low_cm);
|
|
|
|
|
_mm_storel_epi64((__m128i*) dst, srcABCD);
|
|
|
|
|
src += 2;
|
|
|
|
|
dst += 2;
|
|
|
|
|
/* It is VERY important to read next data before it gets invalidated in cpu cache. */
|
|
|
|
|
dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
_mm_storeu_si128((__m128i *) old_dst, srcABCD);
|
|
|
|
|
srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
}
|
|
|
|
|
if (bt_last == BT_ODD) {
|
|
|
|
|
ALPHA_BLEND_2(pack_hi_cm);
|
|
|
|
|
(*dst).data = EXTR32(srcABCD, 2);
|
|
|
|
|
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
|
|
|
|
|
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
|
|
|
|
|
ALPHA_BLEND_2(pack_low_cm);
|
|
|
|
|
dst->data = _mm_cvtsi128_si32(srcABCD);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
@ -117,18 +109,18 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
const int width_diff = si->sprite_width - bp->width;
|
|
|
|
|
effective_width = bp->width - (int) src_rgba_line[0].data;
|
|
|
|
|
const int delta_diff = (int) src_rgba_line[1].data - width_diff;
|
|
|
|
|
const int nd = effective_width - delta_diff;
|
|
|
|
|
effective_width = delta_diff > 0 ? nd : effective_width;
|
|
|
|
|
const int new_width = effective_width - delta_diff;
|
|
|
|
|
effective_width = delta_diff > 0 ? new_width : effective_width;
|
|
|
|
|
if (effective_width <= 0) break;
|
|
|
|
|
/* FALLTHROUGH */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case RM_WITH_SKIP: {
|
|
|
|
|
__m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
|
|
|
|
|
|
|
|
|
|
for (uint x = (uint) effective_width / 2; x > 0; x--) {
|
|
|
|
|
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
|
|
|
|
|
uint32 mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
|
|
|
|
|
|
|
|
|
|
/* Remap colours. */
|
|
|
|
|
if (mvX2 & 0x00FF00FF) {
|
|
|
|
|
/* Written so the compiler uses CMOV. */
|
|
|
|
@ -139,7 +131,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
Colour c0 = 0; // Use alpha of 0 to keep dst as is.
|
|
|
|
|
c0 = r0 == 0 ? c0 : c0map;
|
|
|
|
|
c0 = m0 != 0 ? c0 : src0;
|
|
|
|
|
INSR32(c0.data, srcABCD, 0);
|
|
|
|
|
srcABCD = _mm_cvtsi32_si128(c0.data);
|
|
|
|
|
|
|
|
|
|
const Colour src1 = src[1];
|
|
|
|
|
const uint m1 = (byte) (mvX2 >> 16);
|
|
|
|
@ -156,40 +148,36 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Blend colours. */
|
|
|
|
|
ALPHA_BLEND_2(pack_hi_cm);
|
|
|
|
|
dstABCD = _mm_shuffle_epi32(dstABCD, 0x0E);
|
|
|
|
|
srcABCD = _mm_alignr_epi8(dstABCD, srcABCD, 8);
|
|
|
|
|
Colour *old_dst = dst;
|
|
|
|
|
ALPHA_BLEND_2(pack_low_cm);
|
|
|
|
|
_mm_storel_epi64((__m128i *) dst, srcABCD);
|
|
|
|
|
dst += 2;
|
|
|
|
|
src += 2;
|
|
|
|
|
src_mv += 2;
|
|
|
|
|
dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
_mm_storeu_si128((__m128i *) old_dst, srcABCD);
|
|
|
|
|
mvX2 = *((uint32 *) const_cast<MapValue *>(src_mv));
|
|
|
|
|
srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (effective_width & 1) {
|
|
|
|
|
/* In case the m-channel is zero, do not remap this pixel in any way */
|
|
|
|
|
if (src_mv->m == 0) {
|
|
|
|
|
if (src->a < 255) {
|
|
|
|
|
ALPHA_BLEND_2(pack_hi_cm);
|
|
|
|
|
(*dst).data = EXTR32(srcABCD, 2);
|
|
|
|
|
} else {
|
|
|
|
|
*dst = src->data;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* In case the m-channel is zero, do not remap this pixel in any way. */
|
|
|
|
|
__m128i srcABCD;
|
|
|
|
|
if (src_mv->m) {
|
|
|
|
|
const uint r = remap[src_mv->m];
|
|
|
|
|
if (r != 0) {
|
|
|
|
|
Colour remapped_colour = AdjustBrightness(this->LookupColourInPalette(r), src_mv->v);
|
|
|
|
|
if (src->a < 255) {
|
|
|
|
|
remapped_colour.a = src->a;
|
|
|
|
|
INSR32(remapped_colour.data, srcABCD, 0);
|
|
|
|
|
ALPHA_BLEND_2(pack_hi_cm);
|
|
|
|
|
(*dst).data = EXTR32(srcABCD, 2);
|
|
|
|
|
} else
|
|
|
|
|
if (src->a == 255) {
|
|
|
|
|
*dst = remapped_colour;
|
|
|
|
|
} else {
|
|
|
|
|
remapped_colour.a = src->a;
|
|
|
|
|
srcABCD = _mm_cvtsi32_si128(remapped_colour.data);
|
|
|
|
|
goto bmcr_alpha_blend_single;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
srcABCD = _mm_cvtsi32_si128(src->data);
|
|
|
|
|
if (src->a < 255) {
|
|
|
|
|
bmcr_alpha_blend_single:
|
|
|
|
|
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
|
|
|
|
|
ALPHA_BLEND_2(pack_low_cm);
|
|
|
|
|
}
|
|
|
|
|
dst->data = _mm_cvtsi128_si32(srcABCD);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
@ -200,30 +188,29 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
src_mv_line += si->sprite_width;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case BM_TRANSPARENT: {
|
|
|
|
|
/* Make the current colour a bit more black, so it looks like this image is transparent.
|
|
|
|
|
* rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
|
|
|
|
|
*/
|
|
|
|
|
__m128i srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
for (uint x = (uint) bp->width / 2; x > 0; x--) {
|
|
|
|
|
__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
|
|
|
|
|
__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
|
|
|
|
|
__m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
|
|
|
|
|
__m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
|
|
|
|
|
__m128i dstCD = _mm_unpackhi_epi8(dstABCD, _mm_setzero_si128());
|
|
|
|
|
__m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
|
|
|
|
|
alphaAB = _mm_srli_epi16(alphaAB, 2); // Reduce to 64 levels of shades so the max value fits in 16 bits.
|
|
|
|
|
__m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB);
|
|
|
|
|
dstAB = _mm_mullo_epi16(dstAB, nom);
|
|
|
|
|
dstAB = _mm_srli_epi16(dstAB, 8);
|
|
|
|
|
dstAB = _mm_packus_epi16(dstAB, dstCD);
|
|
|
|
|
Colour *old_dst = dst;
|
|
|
|
|
dstAB = _mm_packus_epi16(dstAB, dstAB);
|
|
|
|
|
_mm_storel_epi64((__m128i *) dst, dstAB);
|
|
|
|
|
src += 2;
|
|
|
|
|
dst += 2;
|
|
|
|
|
dstABCD = _mm_loadu_si128((__m128i*) dst);
|
|
|
|
|
_mm_storeu_si128((__m128i *) old_dst, dstAB);
|
|
|
|
|
srcABCD = _mm_loadu_si128((const __m128i*) src);
|
|
|
|
|
}
|
|
|
|
|
if (bp->width & 1) {
|
|
|
|
|
__m128i srcABCD = _mm_cvtsi32_si128(src->data);
|
|
|
|
|
__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
|
|
|
|
|
__m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128());
|
|
|
|
|
__m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128());
|
|
|
|
|
__m128i alphaAB = _mm_shuffle_epi8(srcAB, a_cm);
|
|
|
|
@ -232,7 +219,7 @@ inline void Blitter_32bppSSSE3::Draw(const Blitter::BlitterParams *bp, ZoomLevel
|
|
|
|
|
dstAB = _mm_mullo_epi16(dstAB, nom);
|
|
|
|
|
dstAB = _mm_srli_epi16(dstAB, 8);
|
|
|
|
|
dstAB = _mm_packus_epi16(dstAB, dstAB);
|
|
|
|
|
(*dst).data = EXTR32(dstAB, 0);
|
|
|
|
|
dst->data = _mm_cvtsi128_si32(dstAB);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|