From fec006a4f9f6b0ce43272d72c017ef9f29c8a296 Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 12 Oct 2024 17:20:32 +0100 Subject: [PATCH] Allow for more fine tuning of Duff's device routines --- src/video/SDL_blit.h | 46 +++++++++++++++++++++++++++++++++--------- src/video/SDL_blit_1.c | 16 +++++++-------- src/video/SDL_blit_A.c | 22 ++++++++++---------- src/video/SDL_blit_N.c | 14 ++++++------- 4 files changed, 62 insertions(+), 36 deletions(-) diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h index 816194d07841e..375446f366817 100644 --- a/src/video/SDL_blit.h +++ b/src/video/SDL_blit.h @@ -610,6 +610,15 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); #else #define USE_DUFFS_LOOP #endif + +#define DUFFS_LOOP1(pixel_copy_increment, width) \ + { \ + int n; \ + for (n = width; n > 0; --n) { \ + pixel_copy_increment; \ + } \ + } + #ifdef USE_DUFFS_LOOP // 8-times unrolled loop @@ -666,8 +675,26 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); } \ } -// Use the 8-times version of the loop by default +// 2-times unrolled loop +#define DUFFS_LOOP2(pixel_copy_increment, width) \ + { \ + int n = (width + 1) / 2; \ + switch (width & 1) { \ + case 0: \ + do { \ + pixel_copy_increment; \ + SDL_FALLTHROUGH; \ + case 1: \ + pixel_copy_increment; \ + } while (--n > 0); \ + } \ + } + +// Use the 4-times version of the loop by default #define DUFFS_LOOP(pixel_copy_increment, width) \ + DUFFS_LOOP4(pixel_copy_increment, width) +// Use the 8-times version of the loop for simple routines +#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ DUFFS_LOOP8(pixel_copy_increment, width) // Special version of Duff's device for even more optimization @@ -701,20 +728,19 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); // Don't use Duff's device to unroll loops #define DUFFS_LOOP(pixel_copy_increment, width) \ - { \ - int n; \ - for (n = width; n > 0; --n) { \ - pixel_copy_increment; \ - } \ - } + DUFFS_LOOP1(pixel_copy_increment, width) +#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP8(pixel_copy_increment, width) \ - DUFFS_LOOP(pixel_copy_increment, width) + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP4(pixel_copy_increment, width) \ - DUFFS_LOOP(pixel_copy_increment, width) + DUFFS_LOOP1(pixel_copy_increment, width) +#define DUFFS_LOOP2(pixel_copy_increment, width) \ + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP_124(pixel_copy_increment1, \ pixel_copy_increment2, \ pixel_copy_increment4, width) \ - DUFFS_LOOP(pixel_copy_increment1, width) + DUFFS_LOOP1(pixel_copy_increment1, width) #endif // USE_DUFFS_LOOP diff --git a/src/video/SDL_blit_1.c b/src/video/SDL_blit_1.c index 63e7c873b5670..1cc0a8c028316 100644 --- a/src/video/SDL_blit_1.c +++ b/src/video/SDL_blit_1.c @@ -48,7 +48,7 @@ static void Blit1to1(SDL_BlitInfo *info) while (height--) { #ifdef USE_DUFFS_LOOP /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = map[*src]; } @@ -100,7 +100,7 @@ static void Blit1to2(SDL_BlitInfo *info) #ifdef USE_DUFFS_LOOP while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *(Uint16 *)dst = map[*src++]; dst += 2; @@ -256,7 +256,7 @@ static void Blit1to4(SDL_BlitInfo *info) while (height--) { #ifdef USE_DUFFS_LOOP /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( *dst++ = map[*src++]; , width); /* *INDENT-ON* */ // clang-format on @@ -297,7 +297,7 @@ static void Blit1to1Key(SDL_BlitInfo *info) if (palmap) { while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dst = palmap[*src]; @@ -313,7 +313,7 @@ static void Blit1to1Key(SDL_BlitInfo *info) } else { while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dst = *src; @@ -345,7 +345,7 @@ static void Blit1to2Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dstp=palmap[*src]; @@ -408,7 +408,7 @@ static void Blit1to4Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dstp = palmap[*src]; @@ -444,7 +444,7 @@ static void Blit1toNAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4( + DUFFS_LOOP( { sR = srcpal[*src].r; sG = srcpal[*src].g; diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index f39ff80388bd5..403c482e77065 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -46,7 +46,7 @@ static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); dR = dstpal[*dst].r; @@ -91,7 +91,7 @@ static void BlitNto1PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); dR = dstpal[*dst].r; @@ -253,7 +253,7 @@ static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) @@ -283,7 +283,7 @@ static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ s = *srcp; d = *dstp; @@ -705,7 +705,7 @@ static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; /* @@ -743,7 +743,7 @@ static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; /* @@ -776,7 +776,7 @@ static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp; unsigned alpha = s >> 27; // downscale alpha to 5 bits /* Here we special-case opaque alpha since the @@ -819,7 +819,7 @@ static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4({ + DUFFS_LOOP({ unsigned alpha; Uint32 s = *srcp; alpha = s >> 27; // downscale alpha to 5 bits @@ -872,7 +872,7 @@ static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) if (sA) { while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); @@ -910,7 +910,7 @@ static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP4( + DUFFS_LOOP( { RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); if (sA && Pixel != ckey) { @@ -1302,7 +1302,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) dstbpp = dstfmt->bytes_per_pixel; while (height--) { - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); if (sA) { diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 9a120a1142a6c..652a906623330 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -1800,7 +1800,7 @@ static void Blit_RGB555_ARGB1555(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src | mask; ++dst; @@ -1831,7 +1831,7 @@ static void Blit4to4MaskAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src | mask; ++dst; @@ -1848,7 +1848,7 @@ static void Blit4to4MaskAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src & mask; ++dst; @@ -2142,7 +2142,7 @@ static void Blit2to2Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( (*srcp & rgbmask) != ckey ) { *dstp = *srcp; @@ -2188,7 +2188,7 @@ static void BlitNtoNKey(SDL_BlitInfo *info) Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift; while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32 | mask; @@ -2206,7 +2206,7 @@ static void BlitNtoNKey(SDL_BlitInfo *info) Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32 & mask; @@ -2463,7 +2463,7 @@ static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info) Uint32 *dst32 = (Uint32 *)dst; while (height--) { /* *INDENT-OFF* */ // clang-format off - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32;