From ede138ea8b13af01d736418f7fd03eb5bb8bffd6 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Fri, 26 Apr 2024 14:46:46 +0200 Subject: [PATCH 01/48] system.h: reinstate MAX_OPEN_HANDLES to avoid breaking compatibility --- include/system.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/system.h b/include/system.h index 587c0a2aaf..a7f90179c6 100644 --- a/include/system.h +++ b/include/system.h @@ -13,6 +13,8 @@ /** @brief Number of filesystems that can be attached to the system */ #define MAX_FILESYSTEMS 10 +/** @brief Number of open handles that can be maintained at one time */ +#define MAX_OPEN_HANDLES 4096 #ifdef __cplusplus extern "C" { From 02fee73951c53a6488d990e9bd1bacfd2b1261b2 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 27 Apr 2024 15:10:57 +0200 Subject: [PATCH 02/48] Makefile: add missing files The previous merges of the math library and asset library didn't include the Makefile changes required to build the new files. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0fd1a3fc89..9ab0395ebc 100755 --- a/Makefile +++ b/Makefile @@ -30,12 +30,14 @@ libdragonsys.a: $(BUILD_DIR)/system.o $(N64_AR) -rcs -o $@ $^ libdragon.a: $(BUILD_DIR)/n64sys.o $(BUILD_DIR)/interrupt.o $(BUILD_DIR)/backtrace.o \ - $(BUILD_DIR)/inthandler.o $(BUILD_DIR)/entrypoint.o \ + $(BUILD_DIR)/fmath.o $(BUILD_DIR)/inthandler.o $(BUILD_DIR)/entrypoint.o \ $(BUILD_DIR)/debug.o $(BUILD_DIR)/debugcpp.o $(BUILD_DIR)/usb.o $(BUILD_DIR)/libcart/cart.o $(BUILD_DIR)/fatfs/ff.o \ $(BUILD_DIR)/fatfs/ffunicode.o $(BUILD_DIR)/rompak.o $(BUILD_DIR)/dragonfs.o \ $(BUILD_DIR)/audio.o $(BUILD_DIR)/display.o $(BUILD_DIR)/surface.o \ $(BUILD_DIR)/console.o $(BUILD_DIR)/asset.o \ - $(BUILD_DIR)/compress/lzh5.o $(BUILD_DIR)/compress/lz4_dec.o $(BUILD_DIR)/compress/ringbuf.o \ + $(BUILD_DIR)/compress/lzh5.o $(BUILD_DIR)/compress/lz4_dec.o $(BUILD_DIR)/compress/lz4_dec_fast.o $(BUILD_DIR)/compress/ringbuf.o \ + $(BUILD_DIR)/compress/aplib_dec_fast.o $(BUILD_DIR)/compress/aplib_dec.o \ + $(BUILD_DIR)/compress/shrinkler_dec_fast.o $(BUILD_DIR)/compress/shrinkler_dec.o \ $(BUILD_DIR)/joybus.o $(BUILD_DIR)/controller.o $(BUILD_DIR)/rtc.o \ $(BUILD_DIR)/eeprom.o $(BUILD_DIR)/eepromfs.o $(BUILD_DIR)/mempak.o \ $(BUILD_DIR)/tpak.o $(BUILD_DIR)/graphics.o $(BUILD_DIR)/rdp.o \ From 52dfc6cdf459e1b22a2861e8ee41743e10bdf155 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 28 Apr 2024 23:09:27 +0200 Subject: [PATCH 03/48] Make sure to use GNU extensions when building C++ Our only C++ test used -std=c++14 which disabled GNU extensions, which in turn broke some macros in pputils used by, eg, rspq_write. Add a compile-time check that extensions are activated and also default to gnu++17 in n64.mk, so that we have a fixed default baseline for all C++ programs that don't bother to specify a version themselves. (cherry picked from commit 8986d64266c263b71f40bf7e032900b5feeefa5e) --- examples/cpptest/Makefile | 2 -- include/pputils.h | 7 +++++++ include/rspq.h | 2 +- n64.mk | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/cpptest/Makefile b/examples/cpptest/Makefile index 44762b4fac..30790941bb 100644 --- a/examples/cpptest/Makefile +++ b/examples/cpptest/Makefile @@ -2,8 +2,6 @@ BUILD_DIR=build SOURCE_DIR=. include $(N64_INST)/include/n64.mk -N64_CXXFLAGS += -std=c++14 - all: cpptest.z64 OBJS = $(BUILD_DIR)/cpptest.o diff --git a/include/pputils.h b/include/pputils.h index ec3d4b2a09..0c15b1aa56 100644 --- a/include/pputils.h +++ b/include/pputils.h @@ -99,6 +99,13 @@ #define __FEB_31(_call, x, ...) _call(x) __FEB_30(_call, __VA_ARGS__) #define __CALL_FOREACH_BIS(fn, ...) __GET_33RD_ARG("ignored", ##__VA_ARGS__, __FEB_31, __FEB_30, __FEB_29, __FEB_28, __FEB_27, __FEB_26, __FEB_25, __FEB_24, __FEB_23, __FEB_22, __FEB_21, __FEB_20, __FEB_19, __FEB_18, __FEB_17, __FEB_16, __FEB_15, __FEB_14, __FEB_13, __FEB_12, __FEB_11, __FEB_10, __FEB_9, __FEB_8, __FEB_7, __FEB_6, __FEB_5, __FEB_4, __FEB_3, __FEB_2, __FEB_1, __FEB_0)(fn, ##__VA_ARGS__) +// Check that GNU extensions are active and macros work correctly. Specifically +// we require the extension that allows ##__VA_ARGS__ to elide the previous comma +// when no variadic arguments are specified. +#if __COUNT_VARARGS() != 0 +#error GNU extensions are required -- please specify a -std=gnuXX/gnu++XX option to the compiler +#endif + /// @endcond #endif diff --git a/include/rspq.h b/include/rspq.h index 03f8262642..f3bc633aff 100644 --- a/include/rspq.h +++ b/include/rspq.h @@ -391,7 +391,7 @@ void* rspq_overlay_get_state(rsp_ucode_t *overlay_ucode); }) #define _rspq_write1(ovl_id, cmd_id, arg0, ...) ({ \ - _Static_assert(__COUNT_VARARGS(__VA_ARGS__) < RSPQ_MAX_SHORT_COMMAND_SIZE); \ + _Static_assert(__COUNT_VARARGS(__VA_ARGS__) <= RSPQ_MAX_SHORT_COMMAND_SIZE, "too many arguments to rspq_write, please use rspq_write_begin/arg/end instead"); \ _rspq_write_prolog(); \ __CALL_FOREACH(_rspq_write_arg, ##__VA_ARGS__); \ rspq_cur_pointer[0] = ((ovl_id) + ((cmd_id)<<24)) | (arg0); \ diff --git a/n64.mk b/n64.mk index 5f5a08c045..3637828e49 100644 --- a/n64.mk +++ b/n64.mk @@ -42,7 +42,7 @@ N64_C_AND_CXX_FLAGS += -ffast-math -ftrapping-math -fno-associative-math N64_C_AND_CXX_FLAGS += -DN64 -O2 -Wall -Werror -Wno-error=deprecated-declarations -fdiagnostics-color=always N64_C_AND_CXX_FLAGS += -Wno-error=unused-variable -Wno-error=unused-but-set-variable -Wno-error=unused-function -Wno-error=unused-parameter -Wno-error=unused-but-set-parameter -Wno-error=unused-label -Wno-error=unused-local-typedefs -Wno-error=unused-const-variable N64_CFLAGS = $(N64_C_AND_CXX_FLAGS) -std=gnu99 -N64_CXXFLAGS = $(N64_C_AND_CXX_FLAGS) +N64_CXXFLAGS = $(N64_C_AND_CXX_FLAGS) -std=gnu++17 N64_ASFLAGS = -mtune=vr4300 -march=vr4300 -Wa,--fatal-warnings -I$(N64_INCLUDEDIR) N64_RSPASFLAGS = -march=mips1 -mabi=32 -Wa,--fatal-warnings -I$(N64_INCLUDEDIR) N64_LDFLAGS = -g -L$(N64_LIBDIR) -ldragon -lm -ldragonsys -Tn64.ld --gc-sections --wrap __do_global_ctors From 9934c73ae769b8968293e322aff9356123209b00 Mon Sep 17 00:00:00 2001 From: Dennis Heinze Date: Thu, 25 Jan 2024 23:03:52 +0100 Subject: [PATCH 04/48] Add rdpq, a comprehensive RDP programming library This commit also includes the following: - Deprecate the old RDP library - Modify and extend the rsp and rspq subsystems to support RDP interoperability - Extend surface.h and sprite.h with rdpq-related features Co-authored-by: Giovanni Bajo Co-authored-by: SpookyIluha <127010686+SpookyIluha@users.noreply.github.com> --- Makefile | 18 +- include/libdragon.h | 9 + include/rdp.h | 371 +++++++- include/rdpq.h | 1556 ++++++++++++++++++++++++++++++ include/rdpq_attach.h | 189 ++++ include/rdpq_constants.h | 33 + include/rdpq_debug.h | 189 ++++ include/rdpq_macros.h | 862 +++++++++++++++++ include/rdpq_mode.h | 894 +++++++++++++++++ include/rdpq_rect.h | 403 ++++++++ include/rdpq_sprite.h | 127 +++ include/rdpq_tex.h | 421 ++++++++ include/rdpq_tri.h | 247 +++++ include/rsp_queue.inc | 222 ++++- include/rsp_rdpq.inc | 1340 ++++++++++++++++++++++++++ include/rspq.h | 42 +- include/rspq_constants.h | 5 + include/sprite.h | 62 +- include/surface.h | 89 +- n64.mk | 1 + src/graphics.c | 6 + src/rdp.c | 863 ++++------------- src/rdpq/rdpq.c | 1139 ++++++++++++++++++++++ src/rdpq/rdpq_attach.c | 141 +++ src/rdpq/rdpq_debug.c | 1589 +++++++++++++++++++++++++++++++ src/rdpq/rdpq_debug_internal.h | 71 ++ src/rdpq/rdpq_internal.h | 176 ++++ src/rdpq/rdpq_mode.c | 169 ++++ src/rdpq/rdpq_rect.c | 74 ++ src/rdpq/rdpq_sprite.c | 139 +++ src/rdpq/rdpq_sprite_internal.h | 12 + src/rdpq/rdpq_tex.c | 696 ++++++++++++++ src/rdpq/rdpq_tex_internal.h | 32 + src/rdpq/rdpq_tri.c | 540 +++++++++++ src/rdpq/rsp_rdpq.S | 807 ++++++++++++++++ src/rsp.c | 78 +- src/rspq/rspq.c | 492 +++++----- src/rspq/rspq_internal.h | 258 +++++ src/sprite.c | 60 ++ 39 files changed, 13402 insertions(+), 1020 deletions(-) create mode 100644 include/rdpq.h create mode 100644 include/rdpq_attach.h create mode 100644 include/rdpq_constants.h create mode 100644 include/rdpq_debug.h create mode 100644 include/rdpq_macros.h create mode 100644 include/rdpq_mode.h create mode 100644 include/rdpq_rect.h create mode 100644 include/rdpq_sprite.h create mode 100644 include/rdpq_tex.h create mode 100644 include/rdpq_tri.h create mode 100644 include/rsp_rdpq.inc create mode 100644 src/rdpq/rdpq.c create mode 100644 src/rdpq/rdpq_attach.c create mode 100644 src/rdpq/rdpq_debug.c create mode 100644 src/rdpq/rdpq_debug_internal.h create mode 100644 src/rdpq/rdpq_internal.h create mode 100644 src/rdpq/rdpq_mode.c create mode 100644 src/rdpq/rdpq_rect.c create mode 100644 src/rdpq/rdpq_sprite.c create mode 100644 src/rdpq/rdpq_sprite_internal.h create mode 100644 src/rdpq/rdpq_tex.c create mode 100644 src/rdpq/rdpq_tex_internal.h create mode 100644 src/rdpq/rdpq_tri.c create mode 100644 src/rdpq/rsp_rdpq.S create mode 100644 src/rspq/rspq_internal.h diff --git a/Makefile b/Makefile index 9ab0395ebc..bfb736da67 100755 --- a/Makefile +++ b/Makefile @@ -50,7 +50,12 @@ libdragon.a: $(BUILD_DIR)/n64sys.o $(BUILD_DIR)/interrupt.o $(BUILD_DIR)/backtra $(BUILD_DIR)/audio/xm64.o $(BUILD_DIR)/audio/libxm/play.o \ $(BUILD_DIR)/audio/libxm/context.o $(BUILD_DIR)/audio/libxm/load.o \ $(BUILD_DIR)/audio/ym64.o $(BUILD_DIR)/audio/ay8910.o \ - $(BUILD_DIR)/rspq/rspq.o $(BUILD_DIR)/rspq/rsp_queue.o + $(BUILD_DIR)/rspq/rspq.o $(BUILD_DIR)/rspq/rsp_queue.o \ + $(BUILD_DIR)/rdpq/rdpq.o $(BUILD_DIR)/rdpq/rsp_rdpq.o \ + $(BUILD_DIR)/rdpq/rdpq_debug.o $(BUILD_DIR)/rdpq/rdpq_tri.o \ + $(BUILD_DIR)/rdpq/rdpq_rect.o $(BUILD_DIR)/rdpq/rdpq_mode.o \ + $(BUILD_DIR)/rdpq/rdpq_sprite.o $(BUILD_DIR)/rdpq/rdpq_tex.o \ + $(BUILD_DIR)/rdpq/rdpq_attach.o @echo " [AR] $@" $(N64_AR) -rcs -o $@ $^ @@ -134,6 +139,17 @@ install: install-mk libdragon install -Cv -m 0644 include/rspq.h $(INSTALLDIR)/mips64-elf/include/rspq.h install -Cv -m 0644 include/rspq_constants.h $(INSTALLDIR)/mips64-elf/include/rspq_constants.h install -Cv -m 0644 include/rsp_queue.inc $(INSTALLDIR)/mips64-elf/include/rsp_queue.inc + install -Cv -m 0644 include/rdpq.h $(INSTALLDIR)/mips64-elf/include/rdpq.h + install -Cv -m 0644 include/rdpq_tri.h $(INSTALLDIR)/mips64-elf/include/rdpq_tri.h + install -Cv -m 0644 include/rdpq_rect.h $(INSTALLDIR)/mips64-elf/include/rdpq_rect.h + install -Cv -m 0644 include/rdpq_attach.h $(INSTALLDIR)/mips64-elf/include/rdpq_attach.h + install -Cv -m 0644 include/rdpq_mode.h $(INSTALLDIR)/mips64-elf/include/rdpq_mode.h + install -Cv -m 0644 include/rdpq_tex.h $(INSTALLDIR)/mips64-elf/include/rdpq_tex.h + install -Cv -m 0644 include/rdpq_sprite.h $(INSTALLDIR)/mips64-elf/include/rdpq_sprite.h + install -Cv -m 0644 include/rdpq_debug.h $(INSTALLDIR)/mips64-elf/include/rdpq_debug.h + install -Cv -m 0644 include/rdpq_macros.h $(INSTALLDIR)/mips64-elf/include/rdpq_macros.h + install -Cv -m 0644 include/rdpq_constants.h $(INSTALLDIR)/mips64-elf/include/rdpq_constants.h + install -Cv -m 0644 include/rsp_rdpq.inc $(INSTALLDIR)/mips64-elf/include/rsp_rdpq.inc mkdir -p $(INSTALLDIR)/mips64-elf/include/libcart install -Cv -m 0644 src/libcart/cart.h $(INSTALLDIR)/mips64-elf/include/libcart/cart.h mkdir -p $(INSTALLDIR)/mips64-elf/include/fatfs diff --git a/include/libdragon.h b/include/libdragon.h index 41a7700557..981d681c23 100755 --- a/include/libdragon.h +++ b/include/libdragon.h @@ -57,6 +57,15 @@ #include "xm64.h" #include "ym64.h" #include "rspq.h" +#include "rdpq.h" +#include "rdpq_tri.h" +#include "rdpq_rect.h" +#include "rdpq_attach.h" +#include "rdpq_mode.h" +#include "rdpq_tex.h" +#include "rdpq_sprite.h" +#include "rdpq_debug.h" +#include "rdpq_macros.h" #include "surface.h" #include "sprite.h" #include "debugcpp.h" diff --git a/include/rdp.h b/include/rdp.h index 9c558b0949..ae187902b7 100644 --- a/include/rdp.h +++ b/include/rdp.h @@ -1,19 +1,83 @@ /** * @file rdp.h - * @brief Hardware Display Interface + * @brief RDP: Hardware Display Interface * @ingroup rdp */ #ifndef __LIBDRAGON_RDP_H #define __LIBDRAGON_RDP_H #include "display.h" -#include "graphics.h" +#include "rdpq.h" +#include "rdpq_attach.h" +#include + +///@cond +typedef struct sprite_s sprite_t; +///@endcond /** * @addtogroup rdp * @{ */ +/** @brief DP start register */ +#define DP_START ((volatile uint32_t*)0xA4100000) + +/** @brief DP end register */ +#define DP_END ((volatile uint32_t*)0xA4100004) + +/** @brief DP current register */ +#define DP_CURRENT ((volatile uint32_t*)0xA4100008) + +/** @brief DP status register */ +#define DP_STATUS ((volatile uint32_t*)0xA410000C) + +/** @brief DP clock counter */ +#define DP_CLOCK ((volatile uint32_t*)0xA4100010) + +/** @brief DP command buffer busy */ +#define DP_BUSY ((volatile uint32_t*)0xA4100014) + +/** @brief DP pipe busy */ +#define DP_PIPE_BUSY ((volatile uint32_t*)0xA4100018) + +/** @brief DP tmem busy */ +#define DP_TMEM_BUSY ((volatile uint32_t*)0xA410001C) + +/** @brief DP is using DMEM DMA */ +#define DP_STATUS_DMEM_DMA (1 << 0) +/** @brief DP is frozen */ +#define DP_STATUS_FREEZE (1 << 1) +/** @brief DP is flushed */ +#define DP_STATUS_FLUSH (1 << 2) +/** @brief DP GCLK is alive */ +#define DP_STATUS_GCLK_ALIVE (1 << 3) +/** @brief DP TMEM is busy */ +#define DP_STATUS_TMEM_BUSY (1 << 4) +/** @brief DP pipeline is busy */ +#define DP_STATUS_PIPE_BUSY (1 << 5) +/** @brief DP command unit is busy */ +#define DP_STATUS_BUSY (1 << 6) +/** @brief DP command buffer is ready */ +#define DP_STATUS_BUFFER_READY (1 << 7) +/** @brief DP DMA is busy */ +#define DP_STATUS_DMA_BUSY (1 << 8) +/** @brief DP command end register is valid */ +#define DP_STATUS_END_VALID (1 << 9) +/** @brief DP command start register is valid */ +#define DP_STATUS_START_VALID (1 << 10) + +#define DP_WSTATUS_RESET_XBUS_DMEM_DMA (1<<0) ///< DP_STATUS write mask: clear #DP_STATUS_DMEM_DMA bit +#define DP_WSTATUS_SET_XBUS_DMEM_DMA (1<<1) ///< DP_STATUS write mask: set #DP_STATUS_DMEM_DMA bit +#define DP_WSTATUS_RESET_FREEZE (1<<2) ///< DP_STATUS write mask: clear #DP_STATUS_FREEZE bit +#define DP_WSTATUS_SET_FREEZE (1<<3) ///< DP_STATUS write mask: set #DP_STATUS_FREEZE bit +#define DP_WSTATUS_RESET_FLUSH (1<<4) ///< DP_STATUS write mask: clear #DP_STATUS_FLUSH bit +#define DP_WSTATUS_SET_FLUSH (1<<5) ///< DP_STATUS write mask: set #DP_STATUS_FLUSH bit +#define DP_WSTATUS_RESET_TMEM_COUNTER (1<<6) ///< DP_STATUS write mask: clear TMEM counter +#define DP_WSTATUS_RESET_PIPE_COUNTER (1<<7) ///< DP_STATUS write mask: clear PIPE counter +#define DP_WSTATUS_RESET_CMD_COUNTER (1<<8) ///< DP_STATUS write mask: clear CMD counter +#define DP_WSTATUS_RESET_CLOCK_COUNTER (1<<9) ///< DP_STATUS write mask: clear CLOCK counter + /** * @brief Mirror settings for textures */ @@ -29,21 +93,6 @@ typedef enum MIRROR_XY } mirror_t; -/** - * @brief RDP sync operations - */ -typedef enum -{ - /** @brief Wait for any operation to complete before causing a DP interrupt */ - SYNC_FULL, - /** @brief Sync the RDP pipeline */ - SYNC_PIPE, - /** @brief Block until all texture load operations are complete */ - SYNC_LOAD, - /** @brief Block until all tile operations are complete */ - SYNC_TILE -} sync_t; - /** * @brief Caching strategy for loaded textures */ @@ -61,40 +110,300 @@ typedef enum extern "C" { #endif -void rdp_init( void ); -void rdp_attach( surface_t* disp ); -void rdp_detach( void ); -void rdp_sync( sync_t sync ); -void rdp_set_clipping( uint32_t tx, uint32_t ty, uint32_t bx, uint32_t by ); -void rdp_set_default_clipping( void ); -void rdp_enable_primitive_fill( void ); +/** + * @brief Enable display of 2D filled (untextured) triangles, with possible alpha blending. + * + * This must be called before using #rdp_draw_filled_triangle. + */ void rdp_enable_blend_fill( void ); -void rdp_enable_texture_copy( void ); + +/** + * @brief Load a sprite into RDP TMEM + * + * @param[in] texslot + * The RDP texture slot to load this sprite into (0-7) + * @param[in] texloc + * The RDP TMEM offset to place the texture at + * @param[in] mirror + * Whether the sprite should be mirrored when displaying past boundaries + * @param[in] sprite + * Pointer to sprite structure to load the texture from + * + * @return The number of bytes consumed in RDP TMEM by loading this sprite + */ uint32_t rdp_load_texture( uint32_t texslot, uint32_t texloc, mirror_t mirror, sprite_t *sprite ); + +/** + * @brief Load part of a sprite into RDP TMEM + * + * Given a sprite with vertical and horizontal slices defined, this function will load the slice specified in + * offset into texture memory. This is usefl for treating a large sprite as a tilemap. + * + * Given a sprite with 3 horizontal slices and two vertical slices, the offsets are as follows: + * + *
+ * *---*---*---*
+ * | 0 | 1 | 2 |
+ * *---*---*---*
+ * | 3 | 4 | 5 |
+ * *---*---*---*
+ * 
+ * + * @param[in] texslot + * The RDP texture slot to load this sprite into (0-7) + * @param[in] texloc + * The RDP TMEM offset to place the texture at + * @param[in] mirror + * Whether the sprite should be mirrored when displaying past boundaries + * @param[in] sprite + * Pointer to sprite structure to load the texture from + * @param[in] offset + * Offset of the particular slice to load into RDP TMEM. + * + * @return The number of bytes consumed in RDP TMEM by loading this sprite + */ uint32_t rdp_load_texture_stride( uint32_t texslot, uint32_t texloc, mirror_t mirror, sprite_t *sprite, int offset ); + +/** + * @brief Draw a textured rectangle + * + * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. + * If the rectangle is larger than the texture, it will be tiled or mirrored based on the* mirror setting + * given in the load texture command. + * + * Before using this command to draw a textured rectangle, use #rdpq_set_mode_copy (or the deprecated + * rdp_enable_texture_copy) to set the RDP up in texture copy mode. + * + * @param[in] texslot + * The texture slot that the texture was previously loaded into (0-7) + * @param[in] tx + * The pixel X location of the top left of the rectangle + * @param[in] ty + * The pixel Y location of the top left of the rectangle + * @param[in] bx + * The pixel X location of the bottom right of the rectangle + * @param[in] by + * The pixel Y location of the bottom right of the rectangle + * @param[in] mirror + * Whether the texture should be mirrored + */ void rdp_draw_textured_rectangle( uint32_t texslot, int tx, int ty, int bx, int by, mirror_t mirror ); + +/** + * @brief Draw a textured rectangle with a scaled texture + * + * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture + * at a scale other than 1. This allows rectangles to be drawn with stretched or squashed textures. + * If the rectangle is larger than the texture after scaling, it will be tiled or mirrored based on the + * mirror setting given in the load texture command. + * + * Before using this command to draw a textured rectangle, use #rdpq_set_mode_copy (or the deprecated + * rdp_enable_texture_copy) to set the RDP up in texture copy mode. + * + * @param[in] texslot + * The texture slot that the texture was previously loaded into (0-7) + * @param[in] tx + * The pixel X location of the top left of the rectangle + * @param[in] ty + * The pixel Y location of the top left of the rectangle + * @param[in] bx + * The pixel X location of the bottom right of the rectangle + * @param[in] by + * The pixel Y location of the bottom right of the rectangle + * @param[in] x_scale + * Horizontal scaling factor + * @param[in] y_scale + * Vertical scaling factor + * @param[in] mirror + * Whether the texture should be mirrored + */ void rdp_draw_textured_rectangle_scaled( uint32_t texslot, int tx, int ty, int bx, int by, double x_scale, double y_scale, mirror_t mirror ); + +/** + * @brief Draw a texture to the screen as a sprite + * + * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. + * + * Before using this command to draw a textured rectangle, use #rdpq_set_mode_copy (or the deprecated + * rdp_enable_texture_copy) to set the RDP up in texture copy mode. + * + * @param[in] texslot + * The texture slot that the texture was previously loaded into (0-7) + * @param[in] x + * The pixel X location of the top left of the sprite + * @param[in] y + * The pixel Y location of the top left of the sprite + * @param[in] mirror + * Whether the texture should be mirrored + */ void rdp_draw_sprite( uint32_t texslot, int x, int y , mirror_t mirror); + +/** + * @brief Draw a texture to the screen as a scaled sprite + * + * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. + * + * Before using this command to draw a textured rectangle, use #rdpq_set_mode_copy (or the deprecated + * rdp_enable_texture_copy) to set the RDP up in texture copy mode. + * + * @param[in] texslot + * The texture slot that the texture was previously loaded into (0-7) + * @param[in] x + * The pixel X location of the top left of the sprite + * @param[in] y + * The pixel Y location of the top left of the sprite + * @param[in] x_scale + * Horizontal scaling factor + * @param[in] y_scale + * Vertical scaling factor + * @param[in] mirror + * Whether the texture should be mirrored + */ void rdp_draw_sprite_scaled( uint32_t texslot, int x, int y, double x_scale, double y_scale, mirror_t mirror); -void rdp_set_primitive_color( uint32_t color ); + +/** + * @brief Set the blend draw color for subsequent filled primitive operations + * + * This function sets the color of all #rdp_draw_filled_triangle operations that follow. + * + * @param[in] color + * Color to draw primitives in + */ void rdp_set_blend_color( uint32_t color ); -void rdp_draw_filled_rectangle( int tx, int ty, int bx, int by ); + +/** + * @brief Draw a filled triangle + * + * Given a color set with #rdp_set_blend_color, this will draw a filled triangle + * to the screen. Vertex order is not important. + * + * Before calling this function, make sure that the RDP is set to blend mode by + * calling #rdp_enable_blend_fill. + * + * @param[in] x1 + * Pixel X1 location of triangle + * @param[in] y1 + * Pixel Y1 location of triangle + * @param[in] x2 + * Pixel X2 location of triangle + * @param[in] y2 + * Pixel Y2 location of triangle + * @param[in] x3 + * Pixel X3 location of triangle + * @param[in] y3 + * Pixel Y3 location of triangle + */ void rdp_draw_filled_triangle( float x1, float y1, float x2, float y2, float x3, float y3 ); + +/** + * @brief Set the flush strategy for texture loads + * + * If textures are guaranteed to be in uncached RDRAM or the cache + * is flushed before calling load operations, the RDP can be told + * to skip flushing the cache. This affords a good speedup. However, + * if you are changing textures in memory on the fly or otherwise do + * not want to deal with cache coherency, set the cache strategy to + * automatic to have the RDP flush cache before texture loads. + * + * @param[in] flush + * The cache strategy, either #FLUSH_STRATEGY_NONE or + * #FLUSH_STRATEGY_AUTOMATIC. + */ void rdp_set_texture_flush( flush_t flush ); + + +/************************************************************************************************** + * Deprecated functions + * + * This is the old rdp.c API which has been replaced by the new API in rdpq.h. + * + * The API is still working correctly. The implementation is based on rspq so that it can be mix + * and matched with existing rdpq constructs. It will emit deprecation warnings when used, trying + * to suggest possible replacements. + **************************************************************************************************/ + +/// @cond + +typedef enum +{ + SYNC_FULL, + SYNC_PIPE, + SYNC_LOAD, + SYNC_TILE +} sync_t; + +__attribute__((deprecated("use rdpq_init instead"))) +void rdp_init( void ); + +__attribute__((deprecated("use rdpq_close instead"))) void rdp_close( void ); -__attribute__((deprecated("use rdp_attach instead"))) +__attribute__((deprecated("use rdpq_attach instead"))) +static inline void rdp_attach( surface_t *surface ) +{ + rdpq_attach(surface, NULL); +} + +__attribute__((deprecated("use rdpq_detach_cb instead"))) +static inline void rdp_detach_async( void (*cb)(void*), void *arg ) +{ + rdpq_detach_cb(cb, arg); +} + +__attribute__((deprecated("use rdpq_detach_wait instead"))) +void rdp_detach( void ); + +__attribute__((deprecated("use rdpq_is_attached instead"))) +static inline bool rdp_is_attached( void ) +{ + return rdpq_is_attached(); +} + +__attribute__((deprecated("use rdpq_detach_show instead"))) +static inline void rdp_detach_show( surface_t *disp ) +{ + rdpq_detach_cb((void(*)(void*))display_show, (disp)); +} + +__attribute__((deprecated("use rdpq_attach instead"))) static inline void rdp_attach_display( display_context_t disp ) { - rdp_attach(disp); + rdpq_attach(disp, NULL); } -__attribute__((deprecated("use rdp_detach instead"))) +__attribute__((deprecated("use rdqp_detach_wait instead"))) static inline void rdp_detach_display( void ) { - rdp_detach(); + rdpq_detach(); } +__attribute__((deprecated("use rdpq_set_scissor instead"))) +void rdp_set_clipping( uint32_t tx, uint32_t ty, uint32_t bx, uint32_t by ); + +__attribute__((deprecated("default clipping is activated automatically during rdp_attach_display"))) +void rdp_set_default_clipping( void ); + +__attribute__((deprecated("syncs are now performed automatically -- or use rdpq_sync_* functions otherwise"))) +void rdp_sync( sync_t sync ); + +__attribute__((deprecated("use rdpq_fill_rectangle instead"))) +void rdp_draw_filled_rectangle( int tx, int ty, int bx, int by ); + +__attribute__((deprecated("use rdpq_set_fill_color instead"))) +static inline void rdp_set_primitive_color(uint32_t color) { + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_FILL_COLOR, 0, color, AUTOSYNC_PIPE); +} + +__attribute__((deprecated("use rdpq_set_mode_fill instead"))) +void rdp_enable_primitive_fill( void ); + +__attribute__((deprecated("use rdpq_set_mode_copy instead"))) +void rdp_enable_texture_copy( void ); + +/// @endcond + + #ifdef __cplusplus } #endif diff --git a/include/rdpq.h b/include/rdpq.h new file mode 100644 index 0000000000..fe3963289a --- /dev/null +++ b/include/rdpq.h @@ -0,0 +1,1556 @@ +/** + * @file rdpq.h + * @brief RDP Command queue + * @ingroup rdpq + */ + +/** + * @defgroup rdpq RDPQ: Hardware-accelerated drawing API + * @brief Interface to the RDP (graphics hardware) for 2D/3D rasterization + * @ingroup display + * + * The RDPQ ("RDP command queue") is a library that allows to interface with + * the RDP ("Reality Display Processor"), the GPU on the N64, through the RSP. + * + * This library is quite vast because RDP is a complex chip to program and full + * of quirks. Moreover, the needs for 2D vs 3D are quite different, and the library + * copes with both. An important effort has been made to make this library + * "just work". + * + * Since the API is wide, the library is split in several header files. Make + * sure to read them all to have a general overview: + * + * * rdpq.h: General low-level RDP command generation. + * * rdpq_tri.h: Low-level screen-space triangle drawing API. + * * rdpq_rect.h: Low-level screen-space rectangle drawing API. + * * rdpq_attach.h: Attachment API (optional), to simplify configuring the render target + * * rdpq_mode.h: Mode API (optional), to simplify configuring the render modes + * * rdpq_tex.h: Texture API (optional), to simplify uploading to TMEM and blitting 2D surfaces + * * rdpq_sprite.h: Sprite API (optional), to simplify uploading to TMEM and blitting sprites + * * rdpq_debug.h: Debugging API (optional), to help catching bugs. + * + * ## Goals of this library + * + * This library is meant to be used directly for two tasks: + * + * * 2D hardware-assisted rasterization: drawing tiles, sprites, text. + * * 3D rasterization of triangles computed on the CPU. This is mostly the case + * if you are porting a 3D engine that runs T&L on the CPU but you want + * to draw triangles using RDP. + * + * For a full 3D project, libdragon offers a full 3D API via the OpenGL API + * (see gl.h); OpenGL internally uses rdpq, but it is unlikely that you will + * need to call rdpq directly when you are using OpenGL. + * + * ## Architecture and rationale + * + * Normally, RDP commands are generated by both the CPU and the RSP. The normal + * split is that CPU is in charge of render mode changes (eg: loading textures, + * defining the alpha blending behavior, etc.), while the RSP executes a full + * T&L pipeline which terminates with the generation of RDP triangle primitives. + * + * This library allows the CPU to enqueue RDP commands. It covers the full + * RDP command set, including triangles. Even if for RDP commands generated by CPU, + * the RSP is involved: in fact, this library is a rspq overlay (see rspq.h). + * All RDP commands are enqueued in the main RSP command queue, and they are sent + * to the RDP by the RSP. + * + * There are two main reasons for this design (rather than letting the CPU directly + * send the commands to the RDP): + * + * * Given that CPU and RSP usually work in parallel (with as few as possible + * syncing points), it is necessary to make sure that the CPU is able to + * schedule RDP commands that will be executed in the right order with + * respect to commands generated by RSP. This is easy to do if CPU-generated + * RDP commands always go through RSP in main command queue. + * + * * Most of the commands are sent unchanged to the RDP (we call them "passthrough"). + * Some commands, instead, are manipulated by the RSP and changed before + * they hit the RDP (we call these "fixups"). This is done to achieve a saner + * semantic for the programmer, hiding a few dark corners of the RDP hardware. + * + * The documentation of the public API of this library describes the final + * behavior of each rdpq command, without explicitly mentioning whether it is + * obtained via fixups or not. For more information on these, see the + * documentation of rdpq.c, which gives an overview of many implementation details. + * + * ## Render modes + * + * The most complicated part of programming RDP is getting the correct render mode + * configuration. At the lowest level (hardware commands), this can be done via + * two functions: #rdpq_set_other_modes_raw (that maps to the RDP command `SET_OTHER_MODES`, + * usually shortened as "SOM") and #rdpq_set_combiner_raw (that maps to the RDP + * command `SET_COMBINE`). These functions are meant for programmers already + * familiar with the RDP hardware, and allow you to manipulate configurations + * freely. + * + * To help with partial SOM changes, rdpq also offers #rdpq_change_other_modes_raw that + * allows to change only some bits of the SOM state. This is done by tracking the + * current SOM state (within the RSP) so that a partial update can be sent. It is + * useful to make programming more modular, so that for instance a portion of code + * can temporarily enable (eg.) fogging, without having to restate the full render + * mode. + * + * Alternatively, rdpq offers a higher level render mode API, which is hopefully + * clearer to understand and more accessible, that tries to hide some of the most + * common pitfalls. This API can be found in the rdpq_mode.h file. It is possible + * to switch from this the higher level API to the lower level one at any time + * in the code with no overhead, so that it can be adopted wherever it is a good + * fit, falling back to lower level programming if/when necessary. + * + * Beginners of RDP programming are strongly encouraged to use rdpq_mode.h, and + * only later dive into lower-level RDP programming, if necessary. + * + * ## Blocks and address lookups + * + * Being a RSPQ overlay, it is possible to record rdpq commands in blocks (via + * #rspq_block_begin / #rspq_block_end, like for any other overlay), to quickly + * replay them with zero CPU time. + * + * rdpq has also some special memory-bandwidth optimizations that are used + * when commands are compiled into blocks (for more details, see documentation + * of rdpq.c). In general, it is advised to use blocks whenever possible, + * especially in case of a sequence of 3 or more rdpq function calls. + * + * TO BE COMPLETED.... + * + * + * ## Debugging: tracer and validator + * + * To help writing correct code, rdpq comes with two very important features: + * + * * A command tracer with disassembler. rdpq is able to intercept all commands + * sent to RDP (including commands assembled directly by third-party rspq + * overlays), and log them via #debugf. The log includes a full disassembly + * of the commands, to help readability. + * * A validator. rdpq can re-interpret all commands sent to RDP and validate + * that they are correct, not only syntactically but also semantically. It is + * extremely easy to make mistakes in programming RDP by setting wrong mode + * flags or forgetting to configure a register, so the validator tries to help by + * flagging potential problems. All validation errors and warnings are sent + * via #debugf. + * + * To initialize the debugging engine, call #rdpq_debug_start just after #rdpq_init + * (or as early as possible). This will start intercepting and validating all + * commands sent to RDP, showing validation errors on the debug spew. + * + * To see a log of RDP commands, call #rdpq_debug_log passing true or false. You + * can activate/deactivate logging around portions of code that you want to analyze, + * as keeping the log active for a whole frame can produce too many information. + * + */ + +#ifndef __LIBDRAGON_RDPQ_H +#define __LIBDRAGON_RDPQ_H + +#include +#include +#include +#include "graphics.h" +#include "n64sys.h" +#include "rdpq_macros.h" +#include "surface.h" +#include "debug.h" + +/** + * @brief Static overlay ID of rdpq library. + * + * The rdpq overlay must be registered at this ID via #rspq_overlay_register_static. + */ +#define RDPQ_OVL_ID (0xC << 28) + +enum { + RDPQ_CMD_NOOP = 0x00, + RDPQ_CMD_SET_LOOKUP_ADDRESS = 0x01, + RDPQ_CMD_FILL_RECTANGLE_EX = 0x02, + RDPQ_CMD_RESET_RENDER_MODE = 0x04, + RDPQ_CMD_SET_COMBINE_MODE_2PASS = 0x05, + RDPQ_CMD_PUSH_RENDER_MODE = 0x06, + RDPQ_CMD_POP_RENDER_MODE = 0x07, + RDPQ_CMD_TRI = 0x08, + RDPQ_CMD_TRI_ZBUF = 0x09, + RDPQ_CMD_TRI_TEX = 0x0A, + RDPQ_CMD_TRI_TEX_ZBUF = 0x0B, + RDPQ_CMD_TRI_SHADE = 0x0C, + RDPQ_CMD_TRI_SHADE_ZBUF = 0x0D, + RDPQ_CMD_TRI_SHADE_TEX = 0x0E, + RDPQ_CMD_TRI_SHADE_TEX_ZBUF = 0x0F, + + RDPQ_CMD_TEXTURE_RECTANGLE_EX = 0x10, + RDPQ_CMD_SET_DEBUG_MODE = 0x11, + RDPQ_CMD_SET_SCISSOR_EX = 0x12, + RDPQ_CMD_SET_PRIM_COLOR_COMPONENT = 0x13, + RDPQ_CMD_MODIFY_OTHER_MODES = 0x14, + RDPQ_CMD_SET_FILL_COLOR_32 = 0x16, + RDPQ_CMD_SET_BLENDING_MODE = 0x18, + RDPQ_CMD_SET_FOG_MODE = 0x19, + RDPQ_CMD_SET_COMBINE_MODE_1PASS = 0x1B, + RDPQ_CMD_AUTOTMEM_SET_ADDR = 0x1C, + RDPQ_CMD_AUTOTMEM_SET_TILE = 0x1D, + RDPQ_CMD_TRIANGLE = 0x1E, + RDPQ_CMD_TRIANGLE_DATA = 0x1F, + + RDPQ_CMD_TEXTURE_RECTANGLE = 0x24, + RDPQ_CMD_TEXTURE_RECTANGLE_FLIP = 0x25, + RDPQ_CMD_SYNC_LOAD = 0x26, + RDPQ_CMD_SYNC_PIPE = 0x27, + RDPQ_CMD_SYNC_TILE = 0x28, + RDPQ_CMD_SYNC_FULL = 0x29, + RDPQ_CMD_SET_KEY_GB = 0x2A, + RDPQ_CMD_SET_KEY_R = 0x2B, + RDPQ_CMD_SET_CONVERT = 0x2C, + RDPQ_CMD_SET_SCISSOR = 0x2D, + RDPQ_CMD_SET_PRIM_DEPTH = 0x2E, + RDPQ_CMD_SET_OTHER_MODES = 0x2F, + + RDPQ_CMD_LOAD_TLUT = 0x30, + RDPQ_CMD_DEBUG = 0x31, + RDPQ_CMD_SET_TILE_SIZE = 0x32, + RDPQ_CMD_LOAD_BLOCK = 0x33, + RDPQ_CMD_LOAD_TILE = 0x34, + RDPQ_CMD_SET_TILE = 0x35, + RDPQ_CMD_FILL_RECTANGLE = 0x36, + RDPQ_CMD_SET_FILL_COLOR = 0x37, + RDPQ_CMD_SET_FOG_COLOR = 0x38, + RDPQ_CMD_SET_BLEND_COLOR = 0x39, + RDPQ_CMD_SET_PRIM_COLOR = 0x3A, + RDPQ_CMD_SET_ENV_COLOR = 0x3B, + RDPQ_CMD_SET_COMBINE_MODE_RAW = 0x3C, + RDPQ_CMD_SET_TEXTURE_IMAGE = 0x3D, + RDPQ_CMD_SET_Z_IMAGE = 0x3E, + RDPQ_CMD_SET_COLOR_IMAGE = 0x3F, +}; + +#define RDPQ_CFG_AUTOSYNCPIPE (1 << 0) ///< Configuration flag: enable automatic generation of SYNC_PIPE commands +#define RDPQ_CFG_AUTOSYNCLOAD (1 << 1) ///< Configuration flag: enable automatic generation of SYNC_LOAD commands +#define RDPQ_CFG_AUTOSYNCTILE (1 << 2) ///< Configuration flag: enable automatic generation of SYNC_TILE commands +#define RDPQ_CFG_AUTOSCISSOR (1 << 3) ///< Configuration flag: enable automatic generation of SET_SCISSOR commands on render target change +#define RDPQ_CFG_DEFAULT (0xFFFF) ///< Configuration flag: default configuration + +///@cond +// Used in inline functions as part of the autosync engine. Not part of public API. +#define AUTOSYNC_TILE(n) (1 << (0+(n))) // Autosync state: Bit used for tile N +#define AUTOSYNC_TILES (0xFF << 0) // Autosync state: Mask for all bits regarding tile +#define AUTOSYNC_TMEM(n) (1 << (8+(n))) // Autosync state: Bit used for tmem portion N +#define AUTOSYNC_TMEMS (0xFF << 8) // Autosync state: Mask for all bits regarding TMEM +#define AUTOSYNC_PIPE (1 << 16) // Autosync state: Bit used for pipe +///@endcond + +///@cond +/* Used internally for bit-packing RDP commands. Not part of public API. */ +#define _carg(value, mask, shift) (((uint32_t)((value) & (mask))) << (shift)) +///@endcond + +/** @brief Tile descriptors. + * + * These are enums that map to integers 0-7, but they can be used in place of the + * integers for code readability. + */ +typedef enum { + TILE0 = 0, ///< Tile #0 (for code readability) + TILE1 = 1, ///< Tile #1 (for code readability) + TILE2 = 2, ///< Tile #2 (for code readability) + TILE3 = 3, ///< Tile #3 (for code readability) + TILE4 = 4, ///< Tile #4 (for code readability) + TILE5 = 5, ///< Tile #5 (for code readability) + TILE6 = 6, ///< Tile #6 (for code readability) + TILE7 = 7, ///< Tile #7 (for code readability) +} rdpq_tile_t; + + +/** + * @brief Tile parameters for #rdpq_set_tile. + * + * This structure contains all possible parameters for #rdpq_set_tile. + * All fields have been made so that the 0 value is always the most + * reasonable default (clamped with default scale, no mirroring). + * This means that you can simply initialize the structure to 0 and then + * change only the fields you need (for instance, through a compound literal). + * + */ +typedef struct { + uint8_t palette; ///< Optional palette associated to the texture. For textures in #FMT_CI4 format, specify the palette index (0-15), otherwise use 0. + + // Additional mapping parameters; Leave them as 0 if not required; + + struct{ + bool clamp; ///< True if texture needs to be clamped. Otherwise wrap the texture around; + bool mirror; ///< True if texture needs to be mirrored. Otherwise wrap the texture without mirroring; + uint8_t mask; ///< Power of 2 boundary of the texture in pixels to wrap. (Important note: Mask value of 0 will force clamping to be ON regardless of clamp value); + int8_t shift; ///< Power of 2 scale of the texture to wrap on. Range is [-5..10]; + } s,t; // S/T directions of the tile descriptor + +} rdpq_tileparms_t; + +/** @brief Tile descriptor internally used by some RDPQ functions. Avoid using if possible */ +#define RDPQ_TILE_INTERNAL TILE7 + +#ifdef __cplusplus +extern "C" { +#endif + +/// @private +extern void __rdpq_set_scissor(uint32_t, uint32_t); + +/** + * @brief Initialize the RDPQ library. + * + * This should be called by the initialization functions of the higher-level + * libraries using RDPQ to emit RDP commands, and/or by the application main + * if the application itself calls rdpq functions. + * + * It is safe to call this function multiple times (it does nothing), so that + * multiple independent libraries using rdpq can call #rdpq_init with no side + * effects. + */ +void rdpq_init(void); + +/** + * @brief Shutdown the RDPQ library. + * + * This is mainly used for testing. + */ +void rdpq_close(void); + + +/** + * @brief Set the configuration of the RDPQ module. + * + * This function allows you to change the configuration of rdpq to enable/disable + * features. This is useful mainly for advanced users that want to manually tune + * RDP programming, disabling some automatisms performed by rdpq. + * + * The configuration is a bitmask that can be composed using the `RDPQ_CFG_*` macros. + * + * To enable or disable specific configuration options use #rdpq_config_enable or + * #rdpq_config_disable. + * + * @param cfg The new configuration to set + * @return The previous configuration + * + * @see #rdpq_config_enable + * @see #rdpq_config_disable + */ +uint32_t rdpq_config_set(uint32_t cfg); + +/** + * @brief Enable a specific set of configuration flags + * + * This function allows you to modify the configuration of rdpq activating a specific + * set of features. It can be useful to temporarily modify the configuration and then + * restore it. + * + * @param cfg_enable_bits Configuration flags to enable + * @return The previous configuration + * + * @see #rdpq_config_set + * @see #rdpq_config_disable + */ +uint32_t rdpq_config_enable(uint32_t cfg_enable_bits); + + +/** + * @brief Disable a specific set of configuration flags + * + * This function allows you to modify the configuration of rdpq disabling a specific + * set of features. It can be useful to temporarily modify the configuration and then + * restore it. + * + * @code{.c} + * // Disable automatic scissor generation + * uint32_t old_cfg = rdpq_config_disable(RDPQ_CFG_AUTOSCISSOR); + * + * // This will change the render target but will NOT issue a corresponding SET_SCISSOR. + * // This is dangerous as the currently-configured scissor might allow to draw outside of + * // the surface boundary, but an advanced user will know if this is correct. + * rdpq_set_color_image(surface); + * + * [...] + * + * // Restore the previous configuration + * rdpq_config_set(old_cfg); + * @endcode + * + * @param cfg_disable_bits Configuration flags to disable + * @return The previous configuration + * + * @see #rdpq_config_set + * @see #rdpq_config_enable + */ +uint32_t rdpq_config_disable(uint32_t cfg_disable_bits); + +/** + * @brief Low level functions to set the matrix coefficients for texture format conversion + */ +inline void rdpq_set_yuv_parms(uint16_t k0, uint16_t k1, uint16_t k2, uint16_t k3, uint16_t k4, uint16_t k5) +{ + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_CONVERT, + _carg(k0, 0x1FF, 13) | _carg(k1, 0x1FF, 4) | (((uint32_t)(k2 & 0x1FF)) >> 5), + _carg(k2, 0x1F, 27) | _carg(k3, 0x1FF, 18) | _carg(k4, 0x1FF, 9) | _carg(k5, 0x1FF, 0), + AUTOSYNC_PIPE); +} + +/** + * @brief Configure a scissoring rectangle in screen coordinates (RDP command: SET_SCISSOR) + * + * This function is used to configure a scissor region that the RDP with adhere to + * while drawing primitives (triangles or rectangles). Any points that fall outside + * of the specified scissoring rectangle will be ignored. + * + * The scissoring capability is also the only one that prevents the RDP from drawing + * outside of the current framebuffer (color surface) extents. As such, rdpq actually + * calls #rdpq_set_scissor automatically any time a new render target is configured + * (eg: via #rdpq_attach or #rdpq_set_color_image), because forgetting to do so might + * easily cause crashes. + * + * Because #rdpq_set_color_image will configure a scissoring region automatically, + * it is normally not required to call this function. Use this function if you want + * to restrict drawing to a smaller area of the framebuffer. + * + * The scissoring rectangle is defined using unsigned coordinates, and thus negative + * coordinates will always be clipped. Rectangle-drawing primitives do not allow to + * specify them at all, but triangle-drawing primitives do. + * + * @param[in] x0 Top-left X coordinate of the rectangle + * @param[in] y0 Top-left Y coordinate of the rectangle + * @param[in] x1 Bottom-right *exclusive* X coordinate of the rectangle + * @param[in] y1 Bottom-right *exclusive* Y coordinate of the rectangle + * + * @see #rdpq_attach + * @see #rdpq_set_color_image + */ +#define rdpq_set_scissor(x0, y0, x1, y1) ({ \ + int32_t x0fx = (x0)*4; \ + int32_t y0fx = (y0)*4; \ + int32_t x1fx = (x1)*4; \ + int32_t y1fx = (y1)*4; \ + assertf(x0fx <= x1fx, "x1 must be greater or equal to x0"); \ + assertf(y0fx <= y1fx, "y1 must be greater or equal to y0"); \ + assertf(x0fx >= 0, "x0 must be positive"); \ + assertf(y0fx >= 0, "y0 must be positive"); \ + __rdpq_set_scissor( \ + _carg(x0fx, 0xFFF, 12) | _carg(y0fx, 0xFFF, 0), \ + _carg(x1fx, 0xFFF, 12) | _carg(y1fx, 0xFFF, 0)); \ +}) + +/** + * @brief Set a fixed Z value to be used instead of a per-pixel value (RDP command; SET_PRIM_DEPTH) + * + * When using z-buffering, normally the Z value used for z-buffering is + * calculated by interpolating the Z of each vertex onto each pixel. + * The RDP allows for usage of a fixed Z value instead, for special + * effects like particles or decals. + * + * This function allows to configure the RDP register that + * holds the fixed Z value. It is then necessary to activate this + * special RDP mode: either manually turning on SOM_ZSOURCE_PRIM via + * #rdpq_change_other_modes_raw. + * + * For beginners, it is suggested to use the mode API instead, via + * #rdpq_mode_zoverride. + * + * @param[in] prim_z Fixed Z value (in range 0..0x7FFF) + * @param[in] prim_dz Delta Z value (must be a signed power of two). + * Pass 0 initially, and increment to next power of two + * in case of problems with objects with the same Z. + * + * @note Pending further investigation of the exact usage of this function, + * and specifically the prim_dz parameter, rdpq does not currently + * offer a higher-level function (`rdpq_set_prim_depth`). + */ + inline void rdpq_set_prim_depth_raw(uint16_t prim_z, int16_t prim_dz) +{ + // NOTE: this does not require a pipe sync + extern void __rdpq_write8(uint32_t, uint32_t, uint32_t); + assertf(prim_z <= 0x7FFF, "prim_z must be in [0..0x7FFF]"); + assertf((prim_dz & -prim_dz) == (prim_dz >= 0 ? prim_dz : -prim_dz), + "prim_dz must be a power of 2"); + __rdpq_write8(RDPQ_CMD_SET_PRIM_DEPTH, 0, _carg(prim_z, 0xFFFF, 16) | _carg(prim_dz, 0xFFFF, 0)); +} + +/** + * @brief Load a portion of a texture into TMEM (RDP command: LOAD_TILE) + * + * This is the main command to load data from RDRAM into TMEM. It is + * normally used to load a texture (or a portion of it), before using + * it for drawing. + * + * @note Beginners are advised to use the rdpq texture API (rdpq_tex.h), + * for instance #rdpq_tex_upload that takes care of everything required. + * + * Before calling #rdpq_load_tile, the tile must have been configured + * using #rdpq_set_tile to specify the TMEM address and pitch, and the + * texture in RDRAM must have been set via #rdpq_set_texture_image. + * + * In addition to loading TMEM, this command also records into the + * tile descriptor the extents of the loaded texture (that is, the + * texture coordinates), so that subsequence draw commands can still + * refer to original texture's coordinates to draw. For instance, + * if you have a large 512x128 texture and you load only a small + * portion into TMEM, for instance the rectangle at coordinates + * (16,16) - (48,48), the RDP will remember (through the tile descriptor) + * that the TMEM contains that specific rectangle, and subsequent + * triangles or rectangles commands can specify S,T texture + * coordinates within the range (16,16)-(48,48). + * + * If the portion being loaded is consecutive in RDRAM (rather + * than being a rectangle within a wider image), prefer using + * #rdpq_load_block for increased performance. + * + * @param[in] tile Tile descriptor to use (TILE0-TILE7). + * @param[in] s0 Upper-left X coordinate of the portion of the texture to load (integer or float). + * Range: 0-1024 + * @param[in] t0 Upper-left Y coordinate of the portion of the texture to load (integer or float), + * Range: 0-1024 + * @param[in] s1 Bottom-right X coordinate of the portion of the texture to load (integer or float), + * Range: 0-1024 + * @param[in] t1 Bottom-right Y coordinate of the portion of the texture to load (integer or float), + * Range: 0-1024 + * + * @see #rdpq_tex_upload + * @see #rdpq_set_texture_image + * @see #rdpq_load_block + * @see #rdpq_set_tile + * @see #rdpq_load_tile_fx + */ +#define rdpq_load_tile(tile, s0, t0, s1, t1) ({ \ + assertf((s0) >= 0 && (t0) >= 0 && (s1) >= 0 && (t1) >= 0, "texture coordinates must be positive"); \ + assertf((s0) < 1024 && (t0) < 1024 && (s1) < 1024 && (t1) < 1024, "texture coordinates must be smaller than 1024"); \ + rdpq_load_tile_fx((tile), (s0)*4, (t0)*4, (s1)*4, (t1)*4); \ +}) + +/** + * @brief Load a portion of a texture into TMEM -- fixed point version (RDP command: LOAD_TILE) + * + * This function is similar to #rdpq_load_tile, but coordinates can be specified + * in fixed point format (0.10.2). Refer to #rdpq_load_tile for increased performance + * + * @note Beginners are advised to use the rdpq texture API (rdpq_tex.h), + * for instance #rdpq_tex_upload that takes care of everything required. + * + * + * @param[in] tile Tile descriptor to use (TILE0-TILE7). + * @param[in] s0 Upper-left X coordinate of the portion of the texture to load (fx 0.10.2). + * Range: 0-4096 + * @param[in] t0 Upper-left Y coordinate of the portion of the texture to load (fx 0.10.2), + * Range: 0-4096 + * @param[in] s1 Bottom-right X coordinate of the portion of the texture to load (fx 0.10.2), + * Range: 0-4096 + * @param[in] t1 Bottom-right Y coordinate of the portion of the texture to load (fx 0.10.2), + * Range: 0-4096 + * + * @see #rdpq_load_tile + * @see #rdpq_tex_upload + */ +inline void rdpq_load_tile_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t s1, uint16_t t1) +{ + extern void __rdpq_write8_syncchangeuse(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_write8_syncchangeuse(RDPQ_CMD_LOAD_TILE, + _carg(s0, 0xFFF, 12) | _carg(t0, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(s1-4, 0xFFF, 12) | _carg(t1-4, 0xFFF, 0), + AUTOSYNC_TMEM(0) | AUTOSYNC_TILE(tile), + AUTOSYNC_TILE(tile)); +} + + +/** + * @brief Load a palette of colors into TMEM (RDP command: LOAD_TLUT) + * + * This command is used to load a palette into TMEM. TMEM can hold up + * to 256 16-bit colors in total to be used as palette, and they must be + * stored in the upper half of TMEM. These colors are arranged as a single + * 256-color palette when drawing #FMT_CI8 images, or 16 16-colors palettes + * when drawing #FMT_CI4 images. + * + * Storage of colors in TMEM is a bit wasteful, as each color is replicated + * four times (in fact, 256 colors * 16-bit + 4 = 2048 bytes, which is + * in fact half of TMEM). This command should be preferred for palette + * loading as it automatically handles this replication. + * + * Loading a palette manually is a bit involved. It requires configuring + * the palette in RDRAM via #rdpq_set_texture_image, and also configure a + * tile descriptor with the TMEM destination address (via #rdpq_set_tile). + * Instead, prefer using the simpler rdpq texture API (rdpq_tex.h), via + * #rdpq_tex_upload_tlut. + * + * @param[in] tile Tile descriptor to use (TILE0-TILE7). This is used + * to extract the destination TMEM address (all other fields + * of the descriptor are ignored). + * @param[in] color_idx Index of the first color to load into TMEM (0-255). + * This is a 16-bit offset into the RDRAM buffer + * set via #rdpq_set_texture_image. + * @param[in] num_colors Number of colors to load (1-256). + * + * @see #rdpq_tex_upload_tlut + */ +inline void rdpq_load_tlut_raw(rdpq_tile_t tile, uint8_t color_idx, uint8_t num_colors) +{ + extern void __rdpq_write8_syncchangeuse(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_write8_syncchangeuse(RDPQ_CMD_LOAD_TLUT, + _carg(color_idx, 0xFF, 14), + _carg(tile, 0x7, 24) | _carg(color_idx+num_colors-1, 0xFF, 14), + AUTOSYNC_TMEM(0), + AUTOSYNC_TILE(tile)); +} + +/** + * @brief Configure the extents of a tile descriptor (RDP command: SET_TILE_SIZE) + * + * This function allows to set the extents (s0,s1 - t0,t1) of a tile descriptor. + * Normally, it is not required to call this function because extents are + * automatically configured when #rdpq_load_tile is called to load contents + * in TMEM. This function is mostly useful when loading contents using + * #rdpq_load_block, or when reinterpreting existing contents of TMEM. + * + * For beginners, it is suggest to use the rdpq texture API (rdpq_tex.h) + * which automatically configures tile descriptors correctly: for instance, + * #rdpq_tex_upload. + * + * @param[in] tile Tile descriptor (TILE0-TILE7) + * @param[in] s0 Top-left X texture coordinate to store in the descriptor (integer or float). + * Range: 0-1024 (inclusive) + * @param[in] t0 Top-left Y texture coordinate to store in the descriptor (integer or float). + * Range: 0-1024 (inclusive) + * @param[in] s1 Bottom-right *exclusive* X texture coordinate to store in the descriptor (integer or float). + * Range: 0-1024 (inclusive) + * @param[in] t1 Bottom-right *exclusive* Y texture coordinate to store in the descriptor (integer or float). + * Range: 0-1024 (inclusive) + * + * @see #rdpq_tex_upload + * @see #rdpq_set_tile_size_fx + */ +#define rdpq_set_tile_size(tile, s0, t0, s1, t1) ({ \ + rdpq_set_tile_size_fx((tile), (s0)*4, (t0)*4, (s1)*4, (t1)*4); \ +}) + +/** + * @brief Configure the extents of a tile descriptor -- fixed point version (RDP command: SET_TILE_SIZE) + * + * This function is similar to #rdpq_set_tile_size, but coordinates must be + * specified using fixed point numbers (10.2). + * + * @param tile Tile descriptor (TILE0-TILE7) + * @param[in] s0 Top-left X texture coordinate to store in the descriptor (fx 10.2) + * @param[in] t0 Top-left Y texture coordinate to store in the descriptor (fx 10.2) + * @param[in] s1 Bottom-right *exclusive* X texture coordinate to store in the descriptor (fx 10.2) + * @param[in] t1 Bottom-right *exclusive* Y texture coordinate to store in the descriptor (fx 10.2) + * + * @see #rdpq_tex_upload + * @see #rdpq_set_tile_size + */ +inline void rdpq_set_tile_size_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t s1, uint16_t t1) +{ + assertf((s0) >= 0 && (t0) >= 0 && (s1) >= 0 && (t1) >= 0, "texture coordinates must be positive"); + assertf((s0) <= 1024*4 && (t0) <= 1024*4 && (s1) <= 1024*4 && (t1) <= 1024*4, "texture coordinates must be smaller than 1024"); + + extern void __rdpq_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_write8_syncchange(RDPQ_CMD_SET_TILE_SIZE, + _carg(s0, 0xFFF, 12) | _carg(t0, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(s1-4, 0xFFF, 12) | _carg(t1-4, 0xFFF, 0), + AUTOSYNC_TILE(tile)); +} + + +/** + * @brief Low level function to load a texture image into TMEM in a single memory transfer + */ +inline void rdpq_load_block_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t num_texels, uint16_t dxt) +{ + extern void __rdpq_write8_syncchangeuse(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_write8_syncchangeuse(RDPQ_CMD_LOAD_BLOCK, + _carg(s0, 0xFFF, 12) | _carg(t0, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(num_texels-1, 0xFFF, 12) | _carg(dxt, 0xFFF, 0), + AUTOSYNC_TMEM(0), + AUTOSYNC_TILE(tile)); +} + +/** + * @brief Load a texture image into TMEM with a single contiguous memory transfer (RDP command: LOAD_BLOCK) + * + * This is a command alternative to #rdpq_load_tile to load data from + * RDRAM into TMEM. It is faster than #rdpq_load_tile but only allows + * to transfer a consecutive block of data; the block can cover multiple + * lines, but not a sub-rectangle of the texture image. + * + * @note Beginners are advised to use the rdpq texture API (rdpq_tex.h), + * for instance #rdpq_tex_upload that takes care of everything required, + * including using #rdpq_load_block for performance whenever possible. + * + * Before calling #rdpq_load_block, the tile must have been configured + * using #rdpq_set_tile to specify the TMEM address, and the texture + * in RDRAM must have been set via #rdpq_set_texture_image. + * + * @note It is important to notice that the RDP will interpret the tile pitch + * configured in the tile descriptor with a different semantic: it is + * used as a number of texels that must be skipped between lines + * in RDRAM. Normally, for a compact texture, it should then be set to zero + * in the call to #rdpq_set_tile. Instead, The *real* pitch of the texture + * in TMEM must be provided to #rdpq_load_block itself. + * + * After the call to #rdpq_load_block, it is not possible to reuse the tile + * descriptor for performing a draw. So a new tile descriptor should be configured + * from scratch using #rdpq_set_tile. + * + * The maximum number of texels that can be transferred by a single call is + * 2048. This allows to fill the TMEM only if a 16-bit or 32-bit texture is used. + * If you need to load a 4-bit or 8-bit texture, consider configuring the tile + * descriptor as 16-bit and adjusting the number of texels accordingly. For instance, + * to transfer a 80x64 4-bit texture (5120 texels), do the transfer as if it was a + * 20x64 16-bit texture (1280 texels). It doesn't matter if you lie to the RDP + * during the texture load: what it matters is that the tile descriptor that you will + * later use for drawing is configured with the correct pixel format. + * + * @param[in] tile Tile descriptor (TILE0-TILE7) + * @param[in] s0 Top-left X texture coordinate to load + * @param[in] t0 Top-left Y texture coordinate to load + * @param[in] num_texels Number of texels to load (max: 2048) + * @param[in] tmem_pitch Pitch of the texture in TMEM (in bytes) + * + * @see #rdpq_load_tile + * @see #rdpq_load_block_fx + * @see #rdpq_set_tile + * @see #rdpq_tex_upload + */ +inline void rdpq_load_block(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t num_texels, uint16_t tmem_pitch) +{ + assertf(num_texels <= 2048, "invalid num_texels %d: must be smaller than 2048", num_texels); + assertf((tmem_pitch % 8) == 0, "invalid tmem_pitch %d: must be multiple of 8", tmem_pitch); + // Dxt is the reciprocal of the number of 64 bit words in a line in 1.11 format, rounded up + uint32_t words = tmem_pitch / 8; + rdpq_load_block_fx(tile, s0, t0, num_texels, (2048 + words - 1) / words); +} + + +/** @brief Special TMEM address to pass to #rdpq_set_tile to use automatic TMEM allocation */ +#define RDPQ_AUTOTMEM (0x8000) +/** @brief Special TMEM address to pass to #rdpq_set_tile to configure a tile with the same address of previous tile */ +#define RDPQ_AUTOTMEM_REUSE(offset) (0x4000 | ((offset)/8)) + + +/// @brief Enqueue a RDP SET_TILE command (full version) +/// @param[in] tile Tile descriptor index (0-7) +/// @param[in] format Texture format for the tile. Cannot be 0. Should correspond to X_get_format in #surface_t or #sprite_t; +/// @param[in] tmem_addr Address in tmem where the texture is (or will be loaded). Must be multiple of 8; +/// @param[in] tmem_pitch Pitch of the texture in tmem in bytes. Must be multiple of 8. Should correspond to srtide in #surface_t; +/// @param[in] parms Additional optional parameters for the tile. Can be left NULL or all 0. More information about the struct is in #rdpq_tileparms_t +inline void rdpq_set_tile(rdpq_tile_t tile, + tex_format_t format, + int32_t tmem_addr, + uint16_t tmem_pitch, + const rdpq_tileparms_t *parms) +{ + static const rdpq_tileparms_t default_parms = {0}; + if (!parms) parms = &default_parms; + else { + assertf(parms->s.shift >= -5 && parms->s.shift <= 10, "invalid s shift %d: must be in [-5..10]", parms->s.shift); + assertf(parms->t.shift >= -5 && parms->t.shift <= 10, "invalid t shift %d: must be in [-5..10]", parms->t.shift); + } + bool fixup = false; + bool reuse = false; + uint32_t cmd_id = RDPQ_CMD_SET_TILE; + if (tmem_addr & (RDPQ_AUTOTMEM | RDPQ_AUTOTMEM_REUSE(0))) { + cmd_id = RDPQ_CMD_AUTOTMEM_SET_TILE; + reuse = (tmem_addr & RDPQ_AUTOTMEM_REUSE(0)) != 0; + fixup = true; + tmem_addr &= ~(RDPQ_AUTOTMEM | RDPQ_AUTOTMEM_REUSE(0)); + } else { + assertf((tmem_addr % 8) == 0, "invalid tmem_addr %ld: must be multiple of 8", tmem_addr); + tmem_addr /= 8; + } + assertf((tmem_pitch % 8) == 0, "invalid tmem_pitch %d: must be multiple of 8", tmem_pitch); + extern void __rdpq_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + (fixup ? __rdpq_fixup_write8_syncchange : __rdpq_write8_syncchange)(cmd_id, + _carg(format, 0x1F, 19) | _carg(reuse, 0x1, 18) | _carg(tmem_pitch/8, 0x1FF, 9) | _carg(tmem_addr, 0x1FF, 0), + _carg(tile, 0x7, 24) | _carg(parms->palette, 0xF, 20) | + _carg(parms->t.clamp | (parms->t.mask == 0), 0x1, 19) | _carg(parms->t.mirror, 0x1, 18) | _carg(parms->t.mask, 0xF, 14) | _carg(parms->t.shift, 0xF, 10) | + _carg(parms->s.clamp | (parms->s.mask == 0), 0x1, 9) | _carg(parms->s.mirror, 0x1, 8) | _carg(parms->s.mask, 0xF, 4) | _carg(parms->s.shift, 0xF, 0), + AUTOSYNC_TILE(tile)); +} + +/** + * @brief Configure the auto-TMEM feature of #rdpq_set_tile + * + * This function is used to manage the auto-TMEM allocation feature for + * #rdpq_set_tile. It allows to keep track of the allocated space in TMEM, + * which can be a simplification. It is used by the rdpq_tex module + * (eg: #rdpq_tex_upload). + * + * The feature works like this: + * - First, start auto-TMEM via rdpq_set_tile_autotmem(0) + * - Load a texture and configure a tile for it. When configuring the tile, + * pass #RDPQ_AUTOTMEM as tmem_addr. This will allocate the texture in the + * first available space. + * - Call #rdpq_set_tile_autotmem again passing the number of used bytes in + * TMEM. Notice that rdpq can't know this by itself. + * - Continue loading the other textures/mipmaps just like before, with + * #RDPQ_AUTOTMEM. + * - If the TMEM is full, a RSP assertion will be triggered. + * - When you are done, call #rdpq_set_tile_autotmem passing -1 to finish. + * This allows reentrant calls to work, and also helps generating errors + * in case of misuses. + * + * While this API might seem as a small simplification over manually tracking + * TMEM allocation, it might help modularizing the code, and also allows to + * record rspq blocks that handle texture loading without hardcoding the + * TMEM position. + * + * @note This function is part of the raw API. For a higher-level API on texture + * loading, see #rdpq_tex_upload. + * + * @param tmem_bytes 0: begin, -1: end, >0: number of additional bytes + * that were used in TMEM. + * + * @see #rdpq_set_tile + * @see #rdpq_tex_upload + */ +void rdpq_set_tile_autotmem(int16_t tmem_bytes); + +/** + * @brief Enqueue a SET_FILL_COLOR RDP command. + * + * This command is used to configure the color used by RDP when running in FILL mode + * (#rdpq_set_mode_fill) and normally used by #rdpq_fill_rectangle. + * + * Notice that #rdpq_set_mode_fill automatically calls this function, because in general + * it makes no sense to configure the FILL mode without also setting a FILL color. + * + * @code{.c} + * // Fill top half of the screen in red + * rdpq_set_mode_fill(RGBA32(255, 0, 0, 0)); + * rdpq_fill_rectangle(0, 0, 320, 120); + * + * // Fill bottom half of the screen in blue. + * // No need to change mode again (it's already in fill mode), + * // so just change the fill color. + * rdpq_set_fill_color(RGBA32(0, 0, 255, 0)); + * rdpq_fill_rectangle(0, 120, 320, 240); + * @endcode + * + * @param[in] color The color to use to fill + * + * @see #rdpq_set_mode_fill + */ +inline void rdpq_set_fill_color(color_t color) { + extern void __rdpq_set_fill_color(uint32_t); + __rdpq_set_fill_color((color.r << 24) | (color.g << 16) | (color.b << 8) | (color.a << 0)); +} + +/** + * @brief Enqueue a SET_FILL_COLOR RDP command to draw a striped pattern. + * + * This command is similar to #rdpq_set_fill_color, but allows to configure + * two colors, and creates a fill pattern that alternates horizontally between + * them every 2 pixels (creating vertical stripes). + * + * This command relies on a low-level hack of how RDP works in filling primitives, + * so there is no configuration knob: it only works with RGBA 16-bit target + * buffers, it only allows two colors, and the vertical stripes are exactly + * 2 pixel width. + * + * @param[in] color1 Color of the first vertical stripe + * @param[in] color2 Color of the second vertical stripe + * + * @see #rdpq_set_fill_color + * + */ +inline void rdpq_set_fill_color_stripes(color_t color1, color_t color2) { + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + uint32_t c1 = (((int)color1.r >> 3) << 11) | (((int)color1.g >> 3) << 6) | (((int)color1.b >> 3) << 1) | (color1.a >> 7); + uint32_t c2 = (((int)color2.r >> 3) << 11) | (((int)color2.g >> 3) << 6) | (((int)color2.b >> 3) << 1) | (color2.a >> 7); + __rdpq_write8_syncchange(RDPQ_CMD_SET_FILL_COLOR, 0, (c1 << 16) | c2, + AUTOSYNC_PIPE); +} + +/** + * @brief Set the RDP FOG blender register + * + * This function sets the internal RDP FOG register, part of the blender unit. + * As the name implies, this register is normally used as part of fog calculation, + * but it is actually a generic color register that can be used in custom + * blender formulas. + * + * Another similar blender register is the BLEND register, configured via + * #rdpq_set_blend_color. + * + * See #RDPQ_BLENDER and #RDPQ_BLENDER2 on how to configure + * the blender (typically, via #rdpq_mode_blender). + * + * @param[in] color Color to set the FOG register to + * + * @see #RDPQ_BLENDER + * @see #RDPQ_BLENDER2 + * @see #rdpq_set_blend_color + * @see #rdpq_mode_blender + */ +inline void rdpq_set_fog_color(color_t color) +{ + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_FOG_COLOR, 0, color_to_packed32(color), + AUTOSYNC_PIPE); +} + +/** + * @brief Set the RDP BLEND blender register + * + * This function sets the internal RDP BLEND register, part of the blender unit. + * As the name implies, this register is normally used as part of fog calculation, + * but it is actually a generic color register that can be used in custom + * blender formulas. + * + * Another similar blender register is the FOG register, configured via + * #rdpq_set_fog_color. + * + * See #RDPQ_BLENDER and #RDPQ_BLENDER2 on how to configure + * the blender (typically, via #rdpq_mode_blender). + * + * @param[in] color Color to set the BLEND register to + * + * @see #RDPQ_BLENDER + * @see #RDPQ_BLENDER2 + * @see #rdpq_set_fog_color + * @see #rdpq_mode_blender + */ +inline void rdpq_set_blend_color(color_t color) +{ + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_BLEND_COLOR, 0, color_to_packed32(color), + AUTOSYNC_PIPE); +} + +/** + * @brief Set the RDP PRIM combiner register (color only) (RDP command: SET_PRIM_COLOR) + * + * This function sets the internal RDP PRIM register, part of the + * color combiner unit. Naming aside, it is a generic color register that + * can be used in custom color combiner formulas. + * + * Another similar blender register is the ENV register, configured via + * #rdpq_set_env_color. + * + * See #RDPQ_COMBINER1 and #RDPQ_COMBINER2 on how to configure + * the color combiner (typicall, via #rdpq_mode_combiner). + * + * If you wish to set PRIM LOD or PRIM MIN LOD values of the PRIM register, + * see #rdpq_set_prim_lod_frac, #rdpq_set_detail_factor or #rdpq_set_prim_register_raw. + * + * @param[in] color Color to set the PRIM register to + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * @see #rdpq_set_env_color + * @see #rdpq_mode_combiner + * @see #rdpq_set_prim_lod_frac + * @see #rdpq_set_detail_factor + * @see #rdpq_set_prim_register_raw + * + */ +inline void rdpq_set_prim_color(color_t color) +{ + // NOTE: this does not require a pipe sync + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_fixup_write8_syncchange(RDPQ_CMD_SET_PRIM_COLOR_COMPONENT, (0<<16), color_to_packed32(color), 0); +} + +/** + * @brief Set the detail/sharpen blending factor (RDP command: SET_PRIM_COLOR (partial)) + * + * This function sets the internal minimum clamp for LOD fraction, that is used for + * determining the interpolation blend factor of a detail or sharpen texture at high + * magnification. + * + * Range is [0..1] where 0 means no influence, and 1 means full influence. + * The range is internally inverted and converted to [0..31] for the RDP hardware + * + * @param[in] value Value to set the register to in range [0..1] + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * @see #rdpq_mode_combiner + * + */ +inline void rdpq_set_detail_factor(float value) +{ + // NOTE: this does not require a pipe sync + int8_t conv = (1.0 - value) * 31; + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_fixup_write8_syncchange(RDPQ_CMD_SET_PRIM_COLOR_COMPONENT, ((conv & 0x1F) << 8) | (2<<16), 0, 0); +} + +/** + * @brief Set the RDP PRIM LOD FRAC combiner register (RDP command: SET_PRIM_COLOR (partial)) + * + * This function sets the internal Level of Detail fraction for primitive register, + * that is used for custom linear interpolation between any two colors in a Color Combiner. + * + * See #RDPQ_COMBINER1 and #RDPQ_COMBINER2 on how to configure + * the color combiner (typicall, via #rdpq_mode_combiner). + * + * If you wish to set PRIM MIN LOD value, see #rdpq_set_detail_factor. + * + * @param[in] value Value to set the PRIM LOD register to in range [0..255] + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * @see #rdpq_mode_combiner + * @see #rdpq_set_detail_factor + * + */ +inline void rdpq_set_prim_lod_frac(uint8_t value) +{ + // NOTE: this does not require a pipe sync + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_fixup_write8_syncchange(RDPQ_CMD_SET_PRIM_COLOR_COMPONENT, value | (1<<16), 0, 0); +} + +/** + * @brief Set the RDP PRIM combiner register (raw version) (RDP command: SET_PRIM_COLOR) + * + * This function sets the internal RDP PRIM register, part of the + * color combiner unit. Naming aside, it is a generic color register that + * can be used in custom color combiner formulas. + * + * It also sets the PRIM LOD FRAC and PRIM MIN LOD FRAC values for the PRIM register + * For more information, see #rdpq_set_prim_lod_frac, #rdpq_set_detail_factor. + * + * Another similar blender register is the ENV register, configured via + * #rdpq_set_env_color. + * + * See #RDPQ_COMBINER1 and #RDPQ_COMBINER2 on how to configure + * the color combiner (typicall, via #rdpq_mode_combiner). + * + * If you wish to set PRIM COLOR or PRIM LOD or PRIM MIN LOD values individually, + * see #rdpq_set_prim_lod_frac, #rdpq_set_detail_factor or #rdpq_set_prim_color. + * + * @param[in] color Color to set the PRIM register to + * @param[in] minlod Minimum LOD fraction to set the PRIM register to + * @param[in] primlod Primitive LOD fraction to set the PRIM register to + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * @see #rdpq_set_env_color + * @see #rdpq_set_prim_color + * @see #rdpq_set_prim_lod_frac + * @see #rdpq_set_detail_factor + * + */ +inline void rdpq_set_prim_register_raw(color_t color, uint8_t minlod, uint8_t primlod) +{ + extern void __rdpq_write8(uint32_t cmd_id, uint32_t arg0, uint32_t arg1); + __rdpq_write8(RDPQ_CMD_SET_PRIM_COLOR, ((minlod & 0x1F) << 8) | primlod, color_to_packed32(color)); +} + +/** + * @brief Set the RDP ENV combiner register (RDP command: SET_ENV_COLOR) + * + * This function sets the internal RDP ENV register, part of the + * color combiner unit. Naming aside, it is a generic color register that + * can be used in custom color combiner formulas. + * + * Another similar blender register is the PRIM register, configured via + * #rdpq_set_prim_color. + * + * See #RDPQ_COMBINER1 and #RDPQ_COMBINER2 on how to configure + * the color combiner (typically, via #rdpq_mode_combiner). + * + * @param[in] color Color to set the ENV register to + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * @see #rdpq_set_prim_color + * @see #rdpq_mode_combiner + * + */ +inline void rdpq_set_env_color(color_t color) +{ + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_ENV_COLOR, 0, color_to_packed32(color), + AUTOSYNC_PIPE); +} + +/** + * @brief Configure the framebuffer to render to (RDP command: SET_COLOR_IMAGE) + * + * This command is used to specify the render target that the RDP will draw to. + * + * Calling this function also automatically configures scissoring (via + * #rdpq_set_scissor), so that all draw commands are clipped within the buffer, + * to avoid overwriting memory around it. Use `rdpq_config_disable(RDPQ_CFG_AUTOSCISSOR)` + * if you need to disable this behavior. + * + * If you have a raw pointer instead of a #surface_t, you can use #surface_make to create + * a temporary surface structure to pass the information to #rdpq_set_color_image. + * + * If the passed surface is NULL, rdpq will be detached from the render target. If + * a drawing command is issued without a render target, it will be silently + * ignored (but the validator will flag it as an error). + * + * The only valid formats for a surface to be used as a render target are: #FMT_RGBA16, + * #FMT_RGBA32, and #FMT_I8. + * + * @param[in] surface Surface to set as render target + * + * @see #rdpq_set_color_image_raw + */ +void rdpq_set_color_image(const surface_t *surface); + +/** + * @brief Configure the Z-buffer to use (RDP command: SET_Z_IMAGE) + * + * This commands is used to specify the Z-buffer that will be used by RDP for the next + * rendering commands. + * + * The surface must have the same width and height of the surface set as render target + * (via #rdpq_set_color_image or #rdpq_set_color_image_raw). The color format should be + * FMT_RGBA16, even though Z values will be written to it. + * + * If the passed surface is NULL, rdpq will be detached from the Z buffer. If + * a drawing command using Z is issued without a Z buffer, the behaviour will be + * undefined (but the validator will flag it as an error). + * + * @param surface Surface to set as Z buffer + * + * @see #rdpq_set_z_image_raw + */ +void rdpq_set_z_image(const surface_t* surface); + +/** + * @brief Configure the texture to use (RDP command: SET_TEX_IMAGE) + * + * This commands is used to specify the texture image that will be used by RDP for + * the next load commands (#rdpq_load_tile and #rdpq_load_block). + * + * The surface must have the same width and height of the surface set as render target + * (via #rdpq_set_color_image or #rdpq_set_color_image_raw). The color format should be + * #FMT_RGBA16, even though Z values will be written to it. + * + * @param surface Surface to set as texture + * + * @see #rdpq_set_texture_image_raw + */ +void rdpq_set_texture_image(const surface_t* surface); + +/** + * @brief Low-level version of #rdpq_set_color_image, with address lookup capability. + * + * This is a low-level version of #rdpq_set_color_image, that exposes the address lookup + * capability. It allows to either pass a direct buffer, or to use a buffer already stored + * in the address lookup table, adding optionally an offset. See #rdpq_set_lookup_address + * for more information. + * + * RDP a physical constraint of 64-byte alignment for render targets, so make sure to respect + * that while configuring a buffer. The validator will flag such a mistake. + * + * @param index Index in the rdpq lookup table of the buffer to set as render target. + * @param offset Byte offset to add to the buffer stored in the lookup table. Notice that + * if index is 0, this can be a physical address to a buffer (use + * #PhysicalAddr to convert a C pointer to a physical address). + * @param format Format of the buffer. Only #FMT_RGBA32, #FMT_RGBA16 or #FMT_I8 are + * possible to use as a render target. + * @param width Width of the buffer in pixel + * @param height Height of the buffer in pixel + * @param stride Stride of the buffer in bytes (length of a row) + * + * @see #rdpq_set_color_image + * @see #rdpq_set_lookup_address + */ +inline void rdpq_set_color_image_raw(uint8_t index, uint32_t offset, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride) +{ + assertf(format == FMT_RGBA32 || format == FMT_RGBA16 || + format == FMT_I8 || format == FMT_CI8, + "Image format is not supported as color image: %s\nIt must be FMT_RGBA32, FMT_RGBA16, FMT_I8 or FMT_CI8", tex_format_name(format)); + assertf(index <= 15, "Lookup address index out of range [0,15]: %d", index); + + extern void __rdpq_set_color_image(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_set_color_image( + _carg(format, 0x1F, 19) | _carg(TEX_FORMAT_BYTES2PIX(format, stride)-1, 0x3FF, 0) | _carg(height-1, 0x1FF, 10), + _carg(index, 0xF, 28) | (offset & 0xFFFFFF) | _carg((height-1)>>9, 0x1, 31), + _carg(0, 0xFFF, 12) | _carg(0, 0xFFF, 0), // for set_scissor + _carg(width*4, 0xFFF, 12) | _carg(height*4, 0xFFF, 0)); // for set_scissor +} + +/** + * @brief Low-level version of #rdpq_set_z_image, with address lookup capability. + * + * This is a low-level version of #rdpq_set_z_image, that exposes the address lookup + * capability. It allows to either pass a direct buffer, or to use a buffer already stored + * in the address lookup table, adding optionally an offset. See #rdpq_set_lookup_address + * for more information. + * + * RDP a physical constraint of 64-byte alignment for render targets, so make sure to respect + * that while configuring a buffer. The validator will flag such a mistake. + * + * @param index Index in the rdpq lookup table of the buffer to set as render target. + * @param offset Byte offset to add to the buffer stored in the lookup table. Notice that + * if index is 0, this can be a physical address to a buffer (use + * #PhysicalAddr to convert a C pointer to a physical address). + * + * @see #rdpq_set_z_image + * @see #rdpq_set_lookup_address + */ +inline void rdpq_set_z_image_raw(uint8_t index, uint32_t offset) +{ + assertf(index <= 15, "Lookup address index out of range [0,15]: %d", index); + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + __rdpq_fixup_write8_syncchange(RDPQ_CMD_SET_Z_IMAGE, + 0, + _carg(index, 0xF, 28) | (offset & 0xFFFFFF), + AUTOSYNC_PIPE); +} + +/** + * @brief Low-level version of #rdpq_set_texture_image, with address lookup capability. + * + * This is a low-level version of #rdpq_set_texture_image, that exposes the address lookup + * capability. It allows to either pass a direct buffer, or to use a buffer already stored + * in the address lookup table, adding optionally an offset. See #rdpq_set_lookup_address + * for more information. + * + * RDP a physical constraint of 8-byte alignment for textures, so make sure to respect + * that while configuring a buffer. The validator will flag such a mistake. + * + * @param index Index in the rdpq lookup table of the buffer to set as texture image. + * @param offset Byte offset to add to the buffer stored in the lookup table. Notice that + * if index is 0, this can be a physical address to a buffer (use + * #PhysicalAddr to convert a C pointer to a physical address). + * @param format Format of the texture (#tex_format_t) + * @param width Width of the texture in pixel (max 1024) + * @param height Height of the texture in pixel (max 1024) + * + * @see #rdpq_set_texture_image + * @see #rdpq_set_lookup_address + */ +inline void rdpq_set_texture_image_raw(uint8_t index, uint32_t offset, tex_format_t format, uint16_t width, uint16_t height) +{ + assertf(width <= 1024, "Texture width out of range [1,1024]: %d", width); + assertf(height <= 1024, "Texture height out of range [1,1024]: %d", height); + assertf(index <= 15, "Lookup address index out of range [0,15]: %d", index); + extern void __rdpq_fixup_write8_syncchange(uint32_t, uint32_t, uint32_t, uint32_t); + // NOTE: we also encode the texture height in the command (split in two halves...) + // to help the validator to a better job. The RDP hardware ignores those bits. + __rdpq_fixup_write8_syncchange(RDPQ_CMD_SET_TEXTURE_IMAGE, + _carg(format, 0x1F, 19) | _carg(width-1, 0x3FF, 0) | _carg(height-1, 0x1FF, 10), + _carg(index, 0xF, 28) | (offset & 0xFFFFFF) | _carg((height-1)>>9, 0x1, 31), + AUTOSYNC_PIPE); +} + +/** + * @brief Store an address into the rdpq lookup table + * + * This function is for advanced usages, it is not normally required to call it. + * + * This function modifies the internal RDPQ address lookup table, by storing + * an address into on of the available slots. + * + * The lookup table is used to allow for an indirect access to surface pointers. + * For instance, some library code might want to record a block that manipulates + * several surfaces, but without saving the actual surface pointers within the + * block. Instead, all commands referring to a surface, will actually refer to + * an index into the lookup table. The caller of the block will then store + * the actual buffer pointers in the table, before playing back the block. + * + * While recording, you can create a placeholder surface via #surface_make_placeholder or + * #surface_make_placeholder_linear that is just an "index" into the lookup + * table. + * + * @code{.c} + * // Create placeholder surfaces with indices 3 and 4 + * surface_t tex1 = surface_make_placeholder_linear(3, FMT_RGBA16, 32, 32); + * surface_t tex2 = surface_make_placeholder_linear(4, FMT_RGBA16, 32, 32); + * + * // Start recording a block. + * rspq_block_begin(); + * rdpq_set_mode_standard(); + * + * // Load texture from lookup table (slot 3) and draw it to the screen + * rdpq_set_texture_image(&tex1); + * rdpq_load_tile(0, 0, 32, 32); + * rdpq_texture_rectangle(0, 0, 32, 32); + * + * // Load texture from lookup table (slot 4) and draw it to the screen + * rdpq_set_texture_image(&tex2); + * rdpq_load_tile(0, 0, 32, 32); + * rdpq_texture_rectangle(32, 0, 64, 32); + * + * rspq_block_t *bl = rspq_block_end(); + * + * [...] + * + * // Set two real textures into the the lookup table and call the block + * rdpq_set_lookup_address(3, robot->buffer); + * rdpq_set_lookup_address(4, dragon->buffer); + * rspq_block_run(bl); + * @endcode + * + * @note RDP has some alignment constraints: color and Z buffers must be 64-byte aligned, + * and textures must be 8-byte aligned. + * + * @param index Index of the slot in the table. Available slots are 1-15 + * (slot 0 is reserved). + * @param rdram_addr Pointer of the buffer to store into the address table. + * + */ +inline void rdpq_set_lookup_address(uint8_t index, void* rdram_addr) +{ + assertf(index > 0 && index <= 15, "Lookup address index out of range [1,15]: %d", index); + extern void __rdpq_write8(uint32_t, uint32_t, uint32_t); + __rdpq_write8(RDPQ_CMD_SET_LOOKUP_ADDRESS, index << 2, PhysicalAddr(rdram_addr)); +} + +/** + * @brief Schedule a RDP SYNC_PIPE command. + * + * This command must be sent before changing the RDP pipeline configuration (eg: color + * combiner, blender, colors, etc.) if the RDP is currently drawing. + * + * Normally, you do not need to call this function because rdpq automatically + * emits sync commands whenever necessary. You must call this function only + * if you have disabled autosync for SYNC_PIPE (see #RDPQ_CFG_AUTOSYNCPIPE). + * + * @note No software emulator currently requires this command, so manually + * sending SYNC_PIPE should be developed on real hardware. + */ +void rdpq_sync_pipe(void); + +/** + * @brief Schedule a RDP SYNC_TILE command. + * + * This command must be sent before changing a RDP tile configuration if the + * RDP is currently drawing using that same tile. + * + * Normally, you do not need to call this function because rdpq automatically + * emits sync commands whenever necessary. You must call this function only + * if you have disabled autosync for SYNC_TILE (see #RDPQ_CFG_AUTOSYNCTILE). + * + * @note No software emulator currently requires this command, so manually + * sending SYNC_TILE should be developed on real hardware. + */ +void rdpq_sync_tile(void); + +/** + * @brief Schedule a RDP SYNC_LOAD command. + * + * This command must be sent before loading an area of TMEM if the + * RDP is currently drawing using that same area. + * + * Normally, you do not need to call this function because rdpq automatically + * emits sync commands whenever necessary. You must call this function only + * if you have disabled autosync for SYNC_LOAD (see #RDPQ_CFG_AUTOSYNCLOAD). + * + * @note No software emulator currently requires this command, so manually + * sending SYNC_LOAD should be developed on real hardware. + */ +void rdpq_sync_load(void); + +/** + * @brief Schedule a RDP SYNC_FULL command and register a callback when it is done. + * + * This function schedules a RDP SYNC_FULL command into the RSP queue. This + * command basically forces the RDP to finish drawing everything that has been + * sent to it before it, and then generate an interrupt when it is done. + * + * This is normally useful at the end of the frame. For instance, it is used + * internally by #rdpq_detach to make sure RDP is finished drawing on + * the target display before detaching it. + * + * The function can be passed an optional callback that will be called + * when the RDP interrupt triggers. This can be useful to perform some operations + * asynchronously. + * + * @param callback A callback to invoke under interrupt when the RDP + * is finished drawing, or NULL if no callback is necessary. + * @param arg Opaque argument that will be passed to the callback. + * + * @see #rspq_wait + * @see #rdpq_fence + * + */ +void rdpq_sync_full(void (*callback)(void*), void* arg); + + +/** + * @brief Low-level function to set the rendering mode register. + * + * This function enqueues a low-level SET_OTHER_MODES RDP command that changes + * the RDP render mode, setting it to a new value + * + * This function is very low level and requires very good knowledge of internal + * RDP state management. Moreover, it completely overwrites any existing + * configuration for all bits, so it must be used with caution within a block. + * + * @note If possible, prefer using the RDPQ mode API (defined in rdpq_mode.h), + * that expose a higher level API for changing the RDP modes + * + * @param mode The new render mode. See the RDP_RM + * + */ +inline void rdpq_set_other_modes_raw(uint64_t mode) +{ + extern void __rdpq_set_other_modes(uint32_t, uint32_t); + __rdpq_set_other_modes( + (mode >> 32) & 0x00FFFFFF, + mode & 0xFFFFFFFF); +} + +/** + * @brief Low-level function to partly change the rendering mode register. + * + * This function is very low level and requires very good knowledge of internal + * RDP state management. + * + * It allows to partially change the RDP render mode register, enqueuing a + * command that will modify only the requested bits. This function + * is to be preferred to #rdpq_set_other_modes_raw as it preservers existing + * render mode for all the other bits, so it allows for easier composition. + * + * @note If possible, prefer using the RDPQ mode API (defined in rdpq_mode.h), + * that expose a higher level API for changing the RDP modes + * + * @param[in] mask Mask of bits of the SOM register that must be changed + * @param[in] val New value for the bits selected by the mask. + * + */ +inline void rdpq_change_other_modes_raw(uint64_t mask, uint64_t val) +{ + extern void __rdpq_change_other_modes(uint32_t, uint32_t, uint32_t); + + if (mask >> 32) + __rdpq_change_other_modes(0, ~(mask >> 32), val >> 32); + if ((uint32_t)mask) + __rdpq_change_other_modes(4, ~(uint32_t)mask, (uint32_t)val); +} + +/** + * @brief Read the current render mode register. + * + * This function executes a full sync (#rspq_wait) and then extracts the + * current raw render mode from the RSP state. This should be used only + * for debugging purposes. + * + * @return THe current value of the render mode register. + */ +uint64_t rdpq_get_other_modes_raw(void); + +/** + * @brief Low-level function to change the RDP combiner. + * + * This function enqueues a low-level SET_COMBINE RDP command that changes + * the RDP combiner, setting it to a new value. + * You can use #RDPQ_COMBINER1 and #RDPQ_COMBINER2 to create + * the combiner settings for respectively a 1-pass or 2-pass combiner. + * + * @note Prefer using #rdpq_mode_combiner (part of the RDPQ mode API), as it better + * handles integration with other render mode changes. + * + * @param comb The new combiner setting + * + * @see #rdpq_mode_combiner + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * + */ +inline void rdpq_set_combiner_raw(uint64_t comb) { + extern void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync); + __rdpq_write8_syncchange(RDPQ_CMD_SET_COMBINE_MODE_RAW, + (comb >> 32) & 0x00FFFFFF, + comb & 0xFFFFFFFF, + AUTOSYNC_PIPE); +} + +/** + * @brief Add a fence to synchronize RSP with RDP commands. + * + * This function schedules a fence in the RSP queue that makes RSP waits until + * all previously enqueued RDP commands have finished executing. This is useful + * in the rare cases in which you need to post-process the output of RDP with RSP + * commands. + * + * Notice that the RSP will spin-lock waiting for RDP to become idle, so, if + * possible, call rdpq_fence as late as possible, to allow for parallel RDP/RSP + * execution for the longest possible time. + * + * Notice that this does not block the CPU in any way; the CPU will just + * schedule the fence command in the RSP queue and continue execution. If you + * need to block the CPU until the RDP is done, check #rspq_wait or #rdpq_sync_full + * instead. + * + * @see #rdpq_sync_full + * @see #rspq_wait + */ +void rdpq_fence(void); + +/** + * @brief Send to the RDP a buffer of RDP commands from RDRAM + * + * This command can be used to execute raw RDP commands from RDRAM. It is + * normally not necessary to call this function as normal rdpq functions will + * simply enqueue the commands in the RSP queue, but there can be cases + * where commands have been prepared in RAM somehow (especially, for compatibility + * with existing code that assembled RDP commands in RDRAM, or to playback + * RDP command lists prepared with offline tools). + * + * This function fully interoperates with the rest of RDPQ, so you can freely + * intermix it with standard rdpq calls. + * + * @param buffer Pointer to the buffer containing RDP commands + * @param size Size of the buffer, in bytes (must be a multiple of 8) + * + * @note This function cannot be called within a block. + */ +void rdpq_exec(void *buffer, int size); + +/** + * @brief Enqueue a RSP command that also generates RDP commands. + * + * This function is similar to #rspq_write: it enqueues a RSP command in the + * RSP command queue for later execution by RSP. The main difference is that + * this macro also declares that the RSP command is going to generate RDP + * commands as part of its execution. + * + * RSP commands in overlays can generate RDP commands by including rsp_rdqp.inc + * and calling RDPQ_Send (or RDPQ_Write8 / RDPQ_Write16 / RDPQ_Finalize). If + * they do, they must enqueued using #rdpq_write instead of #rspq_write. + * + * It is important to know that the RSP command is going to generate RDP commands + * because the space for them needs to be allocated in the static buffer in + * blocks. When wrongly using #rspq_write instead of #rdpq_write, the command + * will work correctly outside of blocks but might fail in surprising ways + * when called within blocks. + * + * In some cases, it is not possible to know beforehand how many RDP commands + * will be generated. In these case, @p num_rdp_commands should be the maximum + * possible value in words. If the number is quite high and potentially + * unbounded, pass the special value "-1". + * + * @param num_rdp_commands Maximum number of RDP 8-byte commands that will be + * generated by the RSP command. Use -1 if the number + * is unbounded and potentially high. + * @param ovl_id ID of the overlay for the command (see #rspq_write) + * @param cmd_id ID of the command (see #rspq_write) + * + * @see #rspq_write + * + * @note Some RDP commands are made of multiple 64 bit words. For the purpose + * of #rdpq_write, please treat @p num_rdp_commands as it was the + * "number of 64-bit words". So for instance if the RSP command generates + * a single RDP TEXTURE_RECTANGLE command, pass 2 as @p num_rdp_commands. + */ +#define rdpq_write(num_rdp_commands, ovl_id, cmd_id, ...) ({ \ + int __num_rdp_commands = (num_rdp_commands); \ + if (!__builtin_constant_p(__num_rdp_commands) || __num_rdp_commands != 0) { \ + extern rspq_block_t *rspq_block; \ + if (__builtin_expect(rspq_block != NULL, 0)) { \ + extern void __rdpq_block_reserve(int); \ + __rdpq_block_reserve(__num_rdp_commands); \ + } \ + } \ + rspq_write(ovl_id, cmd_id, ##__VA_ARGS__); \ +}) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_attach.h b/include/rdpq_attach.h new file mode 100644 index 0000000000..8def077877 --- /dev/null +++ b/include/rdpq_attach.h @@ -0,0 +1,189 @@ +/** + * @file rdpq_attach.h + * @brief RDP Command queue: surface attachment API + * @ingroup rdp + * + * This module implements a higher level API for attaching surfaces to the RDP. + * + * It offers a more common lock/unlock-style API to select render targets that help + * catching mistakes compared to the raw commands such as #rdpq_set_color_image + * or #rdpq_sync_full. + * + * Moreover, a small render target stack is kept internally so to make it easier to + * temporarily switch rendering to an offscreen surface, and then restore the main + * render target. + */ + +#ifndef LIBDRAGON_RDPQ_ATTACH_H +#define LIBDRAGON_RDPQ_ATTACH_H + +#include "rspq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Attach the RDP to a color surface (and optionally a Z buffer) + * + * This function configures the new render targets the RDP will draw to. It accepts + * both a color buffer and optionally a Z buffer, both of which in terms of + * surface_t pointers. + * + * For instance, it can be used with framebuffers acquired by calling #display_get, + * or to render to an offscreen buffer created with #surface_alloc or #surface_make. + * + * This function should be called before any rendering operations to ensure that the RDP + * has a valid render target to operate on. + * + * The previous render targets are stored away in a small stack, so that they can be + * restored later when #rdpq_detach is called. This allows to temporarily switch + * rendering to an offscreen surface, and then restore the main render target. + * + * @param[in] surf_color + * The surface to render to. Supported formats are: #FMT_RGBA32, #FMT_RGBA16, + * #FMT_CI8, #FMT_I8. + * @param[in] surf_z + * The Z-buffer to render to (can be NULL if no Z-buffer is required). + * The only supported format is #FMT_RGBA16. + * + * @see #display_get + * @see #surface_alloc + */ +void rdpq_attach(const surface_t *surf_color, const surface_t *surf_z); + +/** + * @brief Attach the RDP to a surface and clear it + * + * This function is similar to #rdpq_attach, but it also clears the surface + * to full black (color 0) immediately after attaching. If a z-buffer is + * specified, it is also cleared (to 0xFFFC). + * + * This function is just a shortcut for calling #rdpq_attach, #rdpq_clear and + * #rdpq_clear_z. + * + * @param[in] surf_color + * The surface to render to. + * @param[in] surf_z + * The Z-buffer to render to (can be NULL if no Z-buffer is required). + * + * @see #display_get + * @see #surface_alloc + * @see #rdpq_clear + * @see #rdpq_clear_z + */ +void rdpq_attach_clear(const surface_t *surf_color, const surface_t *surf_z); + +/** + * @brief Clear the current render target with the specified color. + * + * Note that this function will respect the current scissor rectangle, if + * configured. + * + * @param[in] color + * Color to use to clear the surface + */ +inline void rdpq_clear(color_t color) { + extern void __rdpq_clear(const color_t *color); + __rdpq_clear(&color); +} + +/** + * @brief Reset the current Z buffer to a given value. + * + * Note that this function will respect the current scissor rectangle, if + * configured. + * + * @param[in] z + * Value to reset the Z buffer to + */ +inline void rdpq_clear_z(uint16_t z) { + extern void __rdpq_clear_z(const uint16_t *z); + __rdpq_clear_z(&z); +} + +/** + * @brief Detach the RDP from the current surface, and restore the previous one + * + * This function detaches the RDP from the current surface. Using a small internal + * stack, the previous render target is restored (if any). + * + * Notice that #rdpq_detach does not wait for the RDP to finish rendering, like any + * other rdpq function. If you need to ensure that the RDP has finished rendering, + * either call #rspq_wait afterwards, or use the #rdpq_detach_wait function. + * + * A common use case is detaching from the main framebuffer (obtained via #display_get), + * and then displaying it via #display_show. For this case, consider using + * #rdpq_detach_show which basically schedules the #display_show to happen automatically + * without blocking the CPU. + * + * @see #rdpq_attach + * @see #rdpq_detach_show + * @see #rdpq_detach_wait + */ +inline void rdpq_detach(void) +{ + extern void rdpq_detach_cb(void (*cb)(void*), void *arg); + rdpq_detach_cb(NULL, NULL); +} + +/** + * @brief Check if the RDP is currently attached to a surface + * + * @return true if it is attached, false otherwise. + */ +bool rdpq_is_attached(void); + +/** + * @brief Detach the RDP from the current framebuffer, and show it on screen + * + * This function runs a #rdpq_detach on the surface, and then schedules in + * background for the surface to be displayed on screen after the RDP has + * finished drawing to it. + * + * The net result is similar to calling #rdpq_detach_wait and then #display_show + * manually, but it is more efficient because it does not block the CPU. Thus, + * if this function is called at the end of the frame, the CPU can immediately + * start working on the next one (assuming there is a free framebuffer available). + * + * @see #rdpq_detach_wait + * @see #display_show + */ +void rdpq_detach_show(void); + +/** + * @brief Detach the RDP from the current surface, waiting for RDP to finish drawing. + * + * This function is similar to #rdpq_detach, but also waits for the RDP to finish + * drawing to the surface. + * + * @see #rdpq_detach + */ +inline void rdpq_detach_wait(void) +{ + rdpq_detach(); + rspq_wait(); +} + +/** + * @brief Detach the RDP from the current surface, and call a callback when + * the RDP has finished drawing to it. + * + * This function is similar to #rdpq_detach: it does not block the CPU, but + * schedules for a callback to be called (under interrupt) when the RDP has + * finished drawing to the surface. + * + * @param[in] cb + * Callback that will be called when the RDP has finished drawing to the surface. + * @param[in] arg + * Argument to the callback. + * + * @see #rdpq_detach + */ +void rdpq_detach_cb(void (*cb)(void*), void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBDRAGON_RDPQ_ATTACH_H */ diff --git a/include/rdpq_constants.h b/include/rdpq_constants.h new file mode 100644 index 0000000000..420e81efaf --- /dev/null +++ b/include/rdpq_constants.h @@ -0,0 +1,33 @@ +#ifndef __LIBDRAGON_RDPQ_CONSTANTS_H +#define __LIBDRAGON_RDPQ_CONSTANTS_H + +#define RDPQ_ADDRESS_TABLE_SIZE 16 + +#define RDPQ_DYNAMIC_BUFFER_SIZE (1024 * 64) + +// Asserted if #rdpq_mode_blender was called in fill/copy mode +#define RDPQ_ASSERT_FILLCOPY_BLENDING 0xC003 + +// Asserted if a 2-pass combiner is set with #rdpq_mode_combiner while mipmap is enabled. +#define RDPQ_ASSERT_MIPMAP_COMB2 0xC004 + +// Asserted if RDPQCmd_Triangle is called with RDPQ_TRIANGLE_REFERENCE == 0 +#define RDPQ_ASSERT_INVALID_CMD_TRI 0xC005 + +// Asserted if RDPQ_Send is called with invalid parameters (begin > end) +#define RDPQ_ASSERT_SEND_INVALID_SIZE 0xC006 + +// Asserted if the TMEM is full during an auto-TMEM operation +#define RDPQ_ASSERT_AUTOTMEM_FULL 0xC007 + +// Asserted if the TMEM is full during an auto-TMEM operation +#define RDPQ_ASSERT_AUTOTMEM_UNPAIRED 0xC008 + +#define RDPQ_MAX_COMMAND_SIZE 44 +#define RDPQ_BLOCK_MIN_SIZE 64 ///< RDPQ block minimum size (in 32-bit words) +#define RDPQ_BLOCK_MAX_SIZE 4192 ///< RDPQ block minimum size (in 32-bit words) + +/** @brief Set to 1 for the reference implementation of RDPQ_TRIANGLE (on CPU) */ +#define RDPQ_TRIANGLE_REFERENCE 0 + +#endif diff --git a/include/rdpq_debug.h b/include/rdpq_debug.h new file mode 100644 index 0000000000..16c06ac6ea --- /dev/null +++ b/include/rdpq_debug.h @@ -0,0 +1,189 @@ +/** + * @file rdpq_debug.h + * @brief RDP Command queue: debugging helpers + * @ingroup rdp + */ + +#ifndef LIBDRAGON_RDPQ_DEBUG_H +#define LIBDRAGON_RDPQ_DEBUG_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +///@cond +typedef struct surface_s surface_t; +///@endcond + +/** + * @brief Initialize the RDPQ debugging engine + * + * This function initializes the RDP debugging engine. After calling this function, + * all RDP commands sent via the rspq/rdpq libraries and overlays will be analyzed + * and validated, providing insights in case of programming errors that trigger + * hardware undefined behaviors or corrupt graphics. The validation errors + * and warnings are emitted via #debugf, so make sure to initialize the debugging + * library to see it. + * + * This is especially important with RDP because the chips is very hard to program + * correctly, and it is common to do mistakes. While rdpq tries to shield the + * programmer from most common mistakes via the fixups, it is still possible + * to do mistakes (eg: creating non-working color combiners) that the debugging + * engine can help spotting. + * + * Notice that the validator needs to maintain a representation of the RDP state, + * as it is not possible to query the RDP about it. So it is better to call + * #rdpq_debug_start immediately after #rdpq_init when required, so that it can + * track all commands from the start. Otherwise, some spurious validation error + * could be emitted. + * + * @note The validator does cause a measurable overhead. It is advised to enable + * it only in debugging builds. + */ +void rdpq_debug_start(void); + +/** + * @brief Stop the rdpq debugging engine. + */ +void rdpq_debug_stop(void); + +/** + * @brief Show a full log of all the RDP commands + * + * This function configures the debugging engine to also log all RDP commands + * to the debugging channel (via #debugf). This is extremely verbose and should + * be used sparingly to debug specific issues. + * + * This function does enqueue a command in the rspq queue, so it is executed + * in order with respect to all rspq/rdpq commands. You can thus delimit + * specific portions of your code with `rdpq_debug_log(true)` / + * `rdpq_debug_log(false)`, to see only the RDP log produced by those + * code lines. + * + * @param show_log true/false to enable/disable the RDP log. + */ +void rdpq_debug_log(bool show_log); + +/** + * @brief Add a custom message in the RDP logging + * + * If the debug log is active, this function adds a custom message to the log. + * It can be useful to annotate different portions of the disassembly. + * + * For instance, the following code: + * + * @code{.c} + * rdpq_debug_log(true); + * + * rdpq_debug_log_msg("Black rectangle"); + * rdpq_set_mode_fill(RGBA32(0,0,0,0)); + * rdpq_fill_rectangle(0, 0, 320, 120); + * + * rdpq_debug_log_msg("Red rectangle"); + * rdpq_set_fill_color(RGBA32(255,0,0,0)); + * rdpq_fill_rectangle(0, 120, 320, 240); + * + * rdpq_debug_log(false); + * @endcode + * + * produces this output: + * + * [0xa00e7128] f1020000000332a8 RDPQ_MESSAGE Black rectangle + * [0xa00e7130] ef30000000000000 SET_OTHER_MODES fill + * [0xa00e7138] ed00000000000000 SET_SCISSOR xy=(0.00,0.00)-(0.00,0.00) + * [0xa00e7140] f700000000000000 SET_FILL_COLOR rgba16=(0,0,0,0) rgba32=(0,0,0,0) + * [0xa00e7148] f65001e000000000 FILL_RECT xy=(0.00,0.00)-(320.00,120.00) + * [0xa00e7150] f1020000000332b8 RDPQ_MESSAGE Red rectangle + * [0xa00e7158] e700000000000000 SYNC_PIPE + * [0xa00e7160] f7000000f800f800 SET_FILL_COLOR rgba16=(31,0,0,0) rgba32=(248,0,248,0) + * [0xa00e7168] f65003c0000001e0 FILL_RECT xy=(0.00,120.00)-(320.00,240.00) + * [0xa00e7170] f101000000000000 RDPQ_SHOWLOG show=0 + * + * where you can see the `RDPQ_MESSAGE` lines which helps isolate portion of commands with + * respect to the source lines that generated them. + * + * @param str message to display + */ +void rdpq_debug_log_msg(const char *str); + +/** + * @brief Acquire a dump of the current contents of TMEM + * + * Inspecting TMEM can be useful for debugging purposes, so this function + * dumps it to RDRAM for inspection. It returns a surface that contains the + * contents of TMEM as a 32x64 FMT_RGBA16 (4K) buffer, but obviously the + * contents can vary and have nothing to do with this layout. + * + * The function will do a full sync (via #rspq_wait) to make sure the + * surface data has been fully written by RDP when the function returns. + * + * For the debugging, you can easily dump the contents of the surface calling + * #debug_hexdump. + * + * The surface must be freed via #surface_free when it is not useful anymore. + * + * @code + * // Get the TMEM contents + * surface_t surf = rdpq_debug_get_tmem(); + * + * // Dump TMEM in the debug spew + * debug_hexdump(surf.buffer, 4096); + * + * surface_free(&surf); + * @endcode + * + * @return A surface with TMEM contents, that must be freed via #surface_free. + */ +surface_t rdpq_debug_get_tmem(void); + +/** + * @brief Install a custom hook that will be called every time a RDP command is processed. + * + * This function can be used to perform custom analysis on the RDP stream. It allows + * you to register a callback that will be called any time a RDP command is processed + * by the debugging engine. + * + * @param hook Hook function that will be called for each RDP command + * @param ctx Context passed to the hook function + * + * @note You can currently install only one hook + */ +void rdpq_debug_install_hook(void (*hook)(void *ctx, uint64_t* cmd, int cmd_size), void* ctx); + +/** + * @brief Disassemble a RDP command + * + * This function allows to access directly the disassembler which is part + * of the rdpq debugging log. Normally, you don't need to use this function: + * just call #rdpq_debug_log to see all RDP commands in disassembled format. + * + * This function can be useful for writing tools or manually debugging a + * RDP stream. + * + * @param buf Pointer to the RDP command + * @param out Ouput stream where to write the disassembled string + * @return true if the command was disassembled, false if the command is being + * held in a buffer waiting for more commands to be appended. + * + * @see #rdpq_debug_disasm_size + */ +bool rdpq_debug_disasm(uint64_t *buf, FILE *out); + +/** + * @brief Return the size of the next RDP commands + * + * @param buf Pointer to RDP command + * @return Number of 64-bit words the command is composed of + */ +int rdpq_debug_disasm_size(uint64_t *buf); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_macros.h b/include/rdpq_macros.h new file mode 100644 index 0000000000..b6e6c7624a --- /dev/null +++ b/include/rdpq_macros.h @@ -0,0 +1,862 @@ +/** + * @file rdpq_macros.h + * @brief RDP command macros + * @ingroup rdp + * + * This file contains macros that can be used to assembly some complex RDP commands: + * the blender and the color combiner configurations. + * + * The file is meant to be included also from RSP assembly code, for readability + * while manipulating these commands. + */ +#ifndef LIBDRAGON_RDPQ_MACROS_H +#define LIBDRAGON_RDPQ_MACROS_H + +#ifndef __ASSEMBLER__ + +/** @brief A combiner formula, created by #RDPQ_COMBINER1 or #RDPQ_COMBINER2 */ +typedef uint64_t rdpq_combiner_t; +/** @brief A blender formula, created by #RDPQ_BLENDER or #RDPQ_BLENDER2 */ +typedef uint32_t rdpq_blender_t; + +#endif + +///@cond +#ifndef __ASSEMBLER__ +#include +#define cast64(x) (uint64_t)(x) +#define castcc(x) (rdpq_combiner_t)(x) +#define castbl(x) (rdpq_blender_t)(x) +#else +#define cast64(x) x +#define castcc(x) x +#define castbl(x) x +#endif +///@endcond + +/// @cond +// Internal helpers to build a color combiner setting +#define _RDPQ_COMB1_RGB_SUBA_TEX0 cast64(1) +#define _RDPQ_COMB1_RGB_SUBA_PRIM cast64(3) +#define _RDPQ_COMB1_RGB_SUBA_SHADE cast64(4) +#define _RDPQ_COMB1_RGB_SUBA_ENV cast64(5) +#define _RDPQ_COMB1_RGB_SUBA_ONE cast64(6) +#define _RDPQ_COMB1_RGB_SUBA_1 cast64(6) +#define _RDPQ_COMB1_RGB_SUBA_NOISE cast64(7) +#define _RDPQ_COMB1_RGB_SUBA_ZERO cast64(8) +#define _RDPQ_COMB1_RGB_SUBA_0 cast64(8) + +#define _RDPQ_COMB2A_RGB_SUBA_TEX0 cast64(1) +#define _RDPQ_COMB2A_RGB_SUBA_TEX1 cast64(2) +#define _RDPQ_COMB2A_RGB_SUBA_PRIM cast64(3) +#define _RDPQ_COMB2A_RGB_SUBA_SHADE cast64(4) +#define _RDPQ_COMB2A_RGB_SUBA_ENV cast64(5) +#define _RDPQ_COMB2A_RGB_SUBA_ONE cast64(6) +#define _RDPQ_COMB2A_RGB_SUBA_1 cast64(6) +#define _RDPQ_COMB2A_RGB_SUBA_NOISE cast64(7) +#define _RDPQ_COMB2A_RGB_SUBA_ZERO cast64(8) +#define _RDPQ_COMB2A_RGB_SUBA_0 cast64(8) + +#define _RDPQ_COMB2B_RGB_SUBA_COMBINED cast64(0) +#define _RDPQ_COMB2B_RGB_SUBA_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_RGB_SUBA_PRIM cast64(3) +#define _RDPQ_COMB2B_RGB_SUBA_SHADE cast64(4) +#define _RDPQ_COMB2B_RGB_SUBA_ENV cast64(5) +#define _RDPQ_COMB2B_RGB_SUBA_ONE cast64(6) +#define _RDPQ_COMB2B_RGB_SUBA_1 cast64(6) +#define _RDPQ_COMB2B_RGB_SUBA_NOISE cast64(7) +#define _RDPQ_COMB2B_RGB_SUBA_ZERO cast64(8) +#define _RDPQ_COMB2B_RGB_SUBA_0 cast64(8) + +#define _RDPQ_COMB1_RGB_SUBB_TEX0 cast64(1) +#define _RDPQ_COMB1_RGB_SUBB_PRIM cast64(3) +#define _RDPQ_COMB1_RGB_SUBB_SHADE cast64(4) +#define _RDPQ_COMB1_RGB_SUBB_ENV cast64(5) +#define _RDPQ_COMB1_RGB_SUBB_KEYCENTER cast64(6) +#define _RDPQ_COMB1_RGB_SUBB_K4 cast64(7) +#define _RDPQ_COMB1_RGB_SUBB_ZERO cast64(8) +#define _RDPQ_COMB1_RGB_SUBB_0 cast64(8) + +#define _RDPQ_COMB2A_RGB_SUBB_TEX0 cast64(1) +#define _RDPQ_COMB2A_RGB_SUBB_TEX1 cast64(2) +#define _RDPQ_COMB2A_RGB_SUBB_PRIM cast64(3) +#define _RDPQ_COMB2A_RGB_SUBB_SHADE cast64(4) +#define _RDPQ_COMB2A_RGB_SUBB_ENV cast64(5) +#define _RDPQ_COMB2A_RGB_SUBB_KEYCENTER cast64(6) +#define _RDPQ_COMB2A_RGB_SUBB_K4 cast64(7) +#define _RDPQ_COMB2A_RGB_SUBB_ZERO cast64(8) +#define _RDPQ_COMB2A_RGB_SUBB_0 cast64(8) + +#define _RDPQ_COMB2B_RGB_SUBB_COMBINED cast64(0) +#define _RDPQ_COMB2B_RGB_SUBB_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_RGB_SUBB_PRIM cast64(3) +#define _RDPQ_COMB2B_RGB_SUBB_SHADE cast64(4) +#define _RDPQ_COMB2B_RGB_SUBB_ENV cast64(5) +#define _RDPQ_COMB2B_RGB_SUBB_KEYCENTER cast64(6) +#define _RDPQ_COMB2B_RGB_SUBB_K4 cast64(7) +#define _RDPQ_COMB2B_RGB_SUBB_ZERO cast64(8) +#define _RDPQ_COMB2B_RGB_SUBB_0 cast64(8) + +#define _RDPQ_COMB1_RGB_MUL_TEX0 cast64(1) +#define _RDPQ_COMB1_RGB_MUL_PRIM cast64(3) +#define _RDPQ_COMB1_RGB_MUL_SHADE cast64(4) +#define _RDPQ_COMB1_RGB_MUL_ENV cast64(5) +#define _RDPQ_COMB1_RGB_MUL_KEYSCALE cast64(6) +#define _RDPQ_COMB1_RGB_MUL_TEX0_ALPHA cast64(8) +#define _RDPQ_COMB1_RGB_MUL_PRIM_ALPHA cast64(10) +#define _RDPQ_COMB1_RGB_MUL_SHADE_ALPHA cast64(11) +#define _RDPQ_COMB1_RGB_MUL_ENV_ALPHA cast64(12) +#define _RDPQ_COMB1_RGB_MUL_LOD_FRAC cast64(13) +#define _RDPQ_COMB1_RGB_MUL_PRIM_LOD_FRAC cast64(14) +#define _RDPQ_COMB1_RGB_MUL_K5 cast64(15) +#define _RDPQ_COMB1_RGB_MUL_ZERO cast64(16) +#define _RDPQ_COMB1_RGB_MUL_0 cast64(16) + +#define _RDPQ_COMB2A_RGB_MUL_TEX0 cast64(1) +#define _RDPQ_COMB2A_RGB_MUL_TEX1 cast64(2) +#define _RDPQ_COMB2A_RGB_MUL_PRIM cast64(3) +#define _RDPQ_COMB2A_RGB_MUL_SHADE cast64(4) +#define _RDPQ_COMB2A_RGB_MUL_ENV cast64(5) +#define _RDPQ_COMB2A_RGB_MUL_KEYSCALE cast64(6) +#define _RDPQ_COMB2A_RGB_MUL_TEX0_ALPHA cast64(8) +#define _RDPQ_COMB2A_RGB_MUL_TEX1_ALPHA cast64(9) +#define _RDPQ_COMB2A_RGB_MUL_PRIM_ALPHA cast64(10) +#define _RDPQ_COMB2A_RGB_MUL_SHADE_ALPHA cast64(11) +#define _RDPQ_COMB2A_RGB_MUL_ENV_ALPHA cast64(12) +#define _RDPQ_COMB2A_RGB_MUL_LOD_FRAC cast64(13) +#define _RDPQ_COMB2A_RGB_MUL_PRIM_LOD_FRAC cast64(14) +#define _RDPQ_COMB2A_RGB_MUL_K5 cast64(15) +#define _RDPQ_COMB2A_RGB_MUL_ZERO cast64(16) +#define _RDPQ_COMB2A_RGB_MUL_0 cast64(16) + +#define _RDPQ_COMB2B_RGB_MUL_COMBINED cast64(0) +#define _RDPQ_COMB2B_RGB_MUL_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_RGB_MUL_PRIM cast64(3) +#define _RDPQ_COMB2B_RGB_MUL_SHADE cast64(4) +#define _RDPQ_COMB2B_RGB_MUL_ENV cast64(5) +#define _RDPQ_COMB2B_RGB_MUL_KEYSCALE cast64(6) +#define _RDPQ_COMB2B_RGB_MUL_COMBINED_ALPHA cast64(7) +#define _RDPQ_COMB2B_RGB_MUL_TEX1_ALPHA cast64(8) // TEX0_ALPHA not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_RGB_MUL_PRIM_ALPHA cast64(10) +#define _RDPQ_COMB2B_RGB_MUL_SHADE_ALPHA cast64(11) +#define _RDPQ_COMB2B_RGB_MUL_ENV_ALPHA cast64(12) +#define _RDPQ_COMB2B_RGB_MUL_LOD_FRAC cast64(13) +#define _RDPQ_COMB2B_RGB_MUL_PRIM_LOD_FRAC cast64(14) +#define _RDPQ_COMB2B_RGB_MUL_K5 cast64(15) +#define _RDPQ_COMB2B_RGB_MUL_ZERO cast64(16) +#define _RDPQ_COMB2B_RGB_MUL_0 cast64(16) + +#define _RDPQ_COMB1_RGB_ADD_TEX0 cast64(1) +#define _RDPQ_COMB1_RGB_ADD_PRIM cast64(3) +#define _RDPQ_COMB1_RGB_ADD_SHADE cast64(4) +#define _RDPQ_COMB1_RGB_ADD_ENV cast64(5) +#define _RDPQ_COMB1_RGB_ADD_ONE cast64(6) +#define _RDPQ_COMB1_RGB_ADD_1 cast64(6) +#define _RDPQ_COMB1_RGB_ADD_ZERO cast64(7) +#define _RDPQ_COMB1_RGB_ADD_0 cast64(7) + +#define _RDPQ_COMB2A_RGB_ADD_TEX0 cast64(1) +#define _RDPQ_COMB2A_RGB_ADD_TEX1 cast64(2) +#define _RDPQ_COMB2A_RGB_ADD_PRIM cast64(3) +#define _RDPQ_COMB2A_RGB_ADD_SHADE cast64(4) +#define _RDPQ_COMB2A_RGB_ADD_ENV cast64(5) +#define _RDPQ_COMB2A_RGB_ADD_ONE cast64(6) +#define _RDPQ_COMB2A_RGB_ADD_1 cast64(6) +#define _RDPQ_COMB2A_RGB_ADD_ZERO cast64(7) +#define _RDPQ_COMB2A_RGB_ADD_0 cast64(7) + +#define _RDPQ_COMB2B_RGB_ADD_COMBINED cast64(0) +#define _RDPQ_COMB2B_RGB_ADD_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_RGB_ADD_PRIM cast64(3) +#define _RDPQ_COMB2B_RGB_ADD_SHADE cast64(4) +#define _RDPQ_COMB2B_RGB_ADD_ENV cast64(5) +#define _RDPQ_COMB2B_RGB_ADD_ONE cast64(6) +#define _RDPQ_COMB2B_RGB_ADD_1 cast64(6) +#define _RDPQ_COMB2B_RGB_ADD_ZERO cast64(7) +#define _RDPQ_COMB2B_RGB_ADD_0 cast64(7) + +#define _RDPQ_COMB1_ALPHA_ADDSUB_TEX0 cast64(1) +#define _RDPQ_COMB1_ALPHA_ADDSUB_PRIM cast64(3) +#define _RDPQ_COMB1_ALPHA_ADDSUB_SHADE cast64(4) +#define _RDPQ_COMB1_ALPHA_ADDSUB_ENV cast64(5) +#define _RDPQ_COMB1_ALPHA_ADDSUB_ONE cast64(6) +#define _RDPQ_COMB1_ALPHA_ADDSUB_1 cast64(6) +#define _RDPQ_COMB1_ALPHA_ADDSUB_ZERO cast64(7) +#define _RDPQ_COMB1_ALPHA_ADDSUB_0 cast64(7) + +#define _RDPQ_COMB2A_ALPHA_ADDSUB_TEX0 cast64(1) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_TEX1 cast64(2) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_PRIM cast64(3) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_SHADE cast64(4) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_ENV cast64(5) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_ONE cast64(6) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_1 cast64(6) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_ZERO cast64(7) +#define _RDPQ_COMB2A_ALPHA_ADDSUB_0 cast64(7) + +#define _RDPQ_COMB2B_ALPHA_ADDSUB_COMBINED cast64(0) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_PRIM cast64(3) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_SHADE cast64(4) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_ENV cast64(5) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_ONE cast64(6) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_1 cast64(6) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_ZERO cast64(7) +#define _RDPQ_COMB2B_ALPHA_ADDSUB_0 cast64(7) + +#define _RDPQ_COMB1_ALPHA_MUL_LOD_FRAC cast64(0) +#define _RDPQ_COMB1_ALPHA_MUL_TEX0 cast64(1) +#define _RDPQ_COMB1_ALPHA_MUL_PRIM cast64(3) +#define _RDPQ_COMB1_ALPHA_MUL_SHADE cast64(4) +#define _RDPQ_COMB1_ALPHA_MUL_ENV cast64(5) +#define _RDPQ_COMB1_ALPHA_MUL_PRIM_LOD_FRAC cast64(6) +#define _RDPQ_COMB1_ALPHA_MUL_ZERO cast64(7) +#define _RDPQ_COMB1_ALPHA_MUL_0 cast64(7) + +#define _RDPQ_COMB2A_ALPHA_MUL_LOD_FRAC cast64(0) +#define _RDPQ_COMB2A_ALPHA_MUL_TEX0 cast64(1) +#define _RDPQ_COMB2A_ALPHA_MUL_TEX1 cast64(2) +#define _RDPQ_COMB2A_ALPHA_MUL_PRIM cast64(3) +#define _RDPQ_COMB2A_ALPHA_MUL_SHADE cast64(4) +#define _RDPQ_COMB2A_ALPHA_MUL_ENV cast64(5) +#define _RDPQ_COMB2A_ALPHA_MUL_PRIM_LOD_FRAC cast64(6) +#define _RDPQ_COMB2A_ALPHA_MUL_ZERO cast64(7) +#define _RDPQ_COMB2A_ALPHA_MUL_0 cast64(7) + +#define _RDPQ_COMB2B_ALPHA_MUL_LOD_FRAC cast64(0) +#define _RDPQ_COMB2B_ALPHA_MUL_TEX1 cast64(1) // TEX0 not available in 2nd cycle (pipelined) +#define _RDPQ_COMB2B_ALPHA_MUL_PRIM cast64(3) +#define _RDPQ_COMB2B_ALPHA_MUL_SHADE cast64(4) +#define _RDPQ_COMB2B_ALPHA_MUL_ENV cast64(5) +#define _RDPQ_COMB2B_ALPHA_MUL_PRIM_LOD_FRAC cast64(6) +#define _RDPQ_COMB2B_ALPHA_MUL_ZERO cast64(7) +#define _RDPQ_COMB2B_ALPHA_MUL_0 cast64(7) + +#define __rdpq_1cyc_comb_rgb(suba, subb, mul, add) \ + (((_RDPQ_COMB1_RGB_SUBA_ ## suba)<<52) | ((_RDPQ_COMB1_RGB_SUBB_ ## subb)<<28) | ((_RDPQ_COMB1_RGB_MUL_ ## mul)<<47) | ((_RDPQ_COMB1_RGB_ADD_ ## add)<<15) | \ + ((_RDPQ_COMB1_RGB_SUBA_ ## suba)<<37) | ((_RDPQ_COMB1_RGB_SUBB_ ## subb)<<24) | ((_RDPQ_COMB1_RGB_MUL_ ## mul)<<32) | ((_RDPQ_COMB1_RGB_ADD_ ## add)<<6)) +#define __rdpq_1cyc_comb_alpha(suba, subb, mul, add) \ + (((_RDPQ_COMB1_ALPHA_ADDSUB_ ## suba)<<44) | ((_RDPQ_COMB1_ALPHA_ADDSUB_ ## subb)<<12) | ((_RDPQ_COMB1_ALPHA_MUL_ ## mul)<<41) | ((_RDPQ_COMB1_ALPHA_ADDSUB_ ## add)<<9) | \ + ((_RDPQ_COMB1_ALPHA_ADDSUB_ ## suba)<<21) | ((_RDPQ_COMB1_ALPHA_ADDSUB_ ## subb)<<3) | ((_RDPQ_COMB1_ALPHA_MUL_ ## mul)<<18) | ((_RDPQ_COMB1_ALPHA_ADDSUB_ ## add)<<0)) + +#define __rdpq_2cyc_comb2a_rgb(suba, subb, mul, add) \ + (((_RDPQ_COMB2A_RGB_SUBA_ ## suba)<<52) | ((_RDPQ_COMB2A_RGB_SUBB_ ## subb)<<28) | ((_RDPQ_COMB2A_RGB_MUL_ ## mul)<<47) | ((_RDPQ_COMB2A_RGB_ADD_ ## add)<<15)) +#define __rdpq_2cyc_comb2a_alpha(suba, subb, mul, add) \ + (((_RDPQ_COMB2A_ALPHA_ADDSUB_ ## suba)<<44) | ((_RDPQ_COMB2A_ALPHA_ADDSUB_ ## subb)<<12) | ((_RDPQ_COMB2A_ALPHA_MUL_ ## mul)<<41) | ((_RDPQ_COMB2A_ALPHA_ADDSUB_ ## add)<<9)) +#define __rdpq_2cyc_comb2b_rgb(suba, subb, mul, add) \ + (((_RDPQ_COMB2B_RGB_SUBA_ ## suba)<<37) | ((_RDPQ_COMB2B_RGB_SUBB_ ## subb)<<24) | ((_RDPQ_COMB2B_RGB_MUL_ ## mul)<<32) | ((_RDPQ_COMB2B_RGB_ADD_ ## add)<<6)) +#define __rdpq_2cyc_comb2b_alpha(suba, subb, mul, add) \ + (((_RDPQ_COMB2B_ALPHA_ADDSUB_ ## suba)<<21) | ((_RDPQ_COMB2B_ALPHA_ADDSUB_ ## subb)<<3) | ((_RDPQ_COMB2B_ALPHA_MUL_ ## mul)<<18) | ((_RDPQ_COMB2B_ALPHA_ADDSUB_ ## add)<<0)) +/// @endcond + +/** @brief Combiner: mask to isolate settings related to cycle 0 */ +#define RDPQ_COMB0_MASK ((cast64(0xF)<<52)|(cast64(0x1F)<<47)|(cast64(0x7)<<44)|(cast64(0x7)<<41)|(cast64(0xF)<<28)|(cast64(0x7)<<15)|(cast64(0x7)<<12)|(cast64(0x7)<<9)) +/** @brief Combiner: mask to isolate settings related to cycle 1 */ +#define RDPQ_COMB1_MASK (~RDPQ_COMB0_MASK & cast64(0x00FFFFFFFFFFFFFF)) + +/** + * @brief Flag to mark the combiner as requiring two passes + * + * This is an internal flag used by rdpq to mark combiner configurations that + * require 2 passes to be executed, and differentiate them from 1 pass configurations. + * + * It is used by rdpq to automatically switch to 2cycle mode when such a + * combiner is configured. + * + * Application code should not use this macro directly. + */ +#define RDPQ_COMBINER_2PASS (cast64(1)<<63) + +/** + * @brief Build a 1-pass combiner formula + * + * This macro allows to build a 1-pass color combiner formula. + * In general, the color combiner is able to execute the following + * per-pixel formula: + * + * (A - B) * C + D + * + * where A, B, C, D can be configured picking several possible + * inputs called "slots". Two different formulas (with the same structure + * but different inputs) must be configured: one for the RGB + * channels and for the alpha channel. + * + * The macro must be invoked as: + * + * RDPQ_COMBINER1((A1, B1, C1, D1), (A2, B2, C2, D2)) + * + * where `A1`, `B1`, `C1`, `D1` define the formula used for RGB channels, + * while `A2`, `B2`, `C2`, `D2` define the formula for the alpha channel. + * Please notice the double parenthesis. + * + * For example, this macro: + * + * RDPQ_COMBINER1((TEX0, 0, SHADE, 0), (0, 0, 0, TEX0)) + * + * configures the formulas: + * + * RGB = (TEX0 - 0) * SHADE + 0 = TEX0 * SHADE + * ALPHA = (0 - 0) * 0 + TEX0 = TEX0 + * + * In the RGB channels, the texel color is multiplied by the shade color + * (which is the per-pixel interpolated vertex color), basically applying + * gouraud shading. The alpha channel of the texel is instead passed through + * with no modifications. + * + * The output of the combiner goes into the blender unit, that allows for further + * operations on the RGB channels, especially allowing to blend it with the + * framebuffer contents. See #RDPQ_BLENDER for information on how to configure the blender. + * + * The values created by #RDPQ_COMBINER1 are of type #rdpq_combiner_t. They can be used + * in two different ways: + * + * * When using the higher-level mode API (rdpq_mode.h), pass it to + * #rdpq_mode_combiner. This will take care of everything else required + * to make the combiner work (eg: render mode tweaks). See the + * documentation of #rdpq_mode_combiner for more information. + * * When using the lower-level API (#rdpq_set_combiner_raw), + * the combiner is configured into RDP, but it is up to the programmer + * to make sure the current render mode is compatible with it, + * or tweak it by calling #rdpq_set_other_modes_raw. For instance, + * if the render mode is in 2-cycle mode, only a 2-pass combiner + * should be set. + * + * This is the list of all possible slots. Not all slots are + * available for the four variables (see the table below). + * + * * `TEX0`: texel of the texture being drawn. + * * `SHADE`: per-pixel interpolated color. This can be set on each + * vertex of a triangle, and is interpolated across each pixel. It + * cannot be used while drawing rectangles. + * * `PRIM`: value of the PRIM register (set via #rdpq_set_prim_color) + * * `ENV`: value of the ENV register (set via #rdpq_set_env_color) + * * `NOISE`: a random value + * * `1`: the constant value 1.0 + * * `0`: the constant value 0.0 + * * `K4`: the constant value configured as `k4` as part of YUV parameters + * (via #rdpq_set_yuv_parms). + * * `K5`: the constant value configured as `k5` as part of YUV parameters + * (via #rdpq_set_yuv_parms). + * * `TEX0_ALPHA`: alpha of the text of the texture being drawn. + * * `SHADE_ALPHA`: alpha of the per-pixel interpolated color. + * * `PRIM_ALPHA`: alpha of the PRIM register (set via #rdpq_set_prim_color) + * * `ENV_ALPHA`: alpha of the ENV register (set via #rdpq_set_env_color) + * * `LOD_FRAC`: the LOD fraction, that is the fractional value that can be used + * as interpolation value between different mipmaps. It basically + * says how much the texture is being scaled down. + * * `PRIM_LOD_FRAC` + * * `KEYCENTER` + * * `KEYSCALE` + * + * These tables show, for each possible variable of the RGB and ALPHA formula, + * which slots are allowed: + * + * + * + * + * + * + * + *
RGBA`TEX0`, `SHADE`, `PRIM`, `ENV`, `NOISE`, `1`, `0`
B `TEX0`, `SHADE`, `PRIM`, `ENV`, `KEYCENTER`, `K4`, `0`
C `TEX0`, `SHADE`, `PRIM`, `ENV`, `TEX0_ALPHA`, `SHADE_ALPHA`, `PRIM_ALPHA`, `ENV_ALPHA`, `LOD_FRAC`, `PRIM_LOD_FRAC`, `K5`, 'KEYSCALE', `0`
D
`TEX0`, `SHADE`, `PRIM`, `ENV`, `1`, `0`
+ * + * + * + * + * + * + * + *
ALPHAA`TEX0`, `SHADE`, `PRIM`, `ENV`, `1`, `0`
B`TEX0`, `SHADE`, `PRIM`, `ENV`, `1`, `0`
C`TEX0`, `SHADE`, `PRIM`, `ENV`, `LOD_FRAC`, `PRIM_LOD_FRAC`, `0`
D`TEX0`, `SHADE`, `PRIM`, `ENV`, `1`, `0`
+ * + * For instance, to draw a gouraud-shaded textured triangle, one might want to calculate + * the following combiner formula: + * + * RGB = TEX0 * SHADE + * ALPHA = TEX0 * SHADE + * + * which means that for all channels, we multiply the value sampled from the texture + * with the per-pixel interpolated color coming from the triangle vertex. To do so, + * we need to adapt the formula to the 4-variable combiner structure: + * + * RGB = (TEX0 - 0) * SHADE + 0 + * ALPHA = (TEX0 - 0) * SHADE + 0 + * + * To program this into the combiner, we can issue the following command: + * + * rdpq_mode_combiner(RDPQ1_COMBINER((TEX0, 0, SHADE, 0), (TEX0, 0, SHADE, 0))); + * + * Notice that this is just a way to obtain the formula above. Another possibility is: + * + * rdpq_mode_combiner(RDPQ1_COMBINER((1, 0, SHADE, TEX0), (1, 0, SHADE, TEX0))); + * + * which will obtain exactly the same result. + * + * A complete example drawing a textured rectangle with a fixed semi-transparency of 0.7: + * + * @code{.c} + * // Set standard mode + * rdpq_set_mode_standard(); + * + * // Set a combiner to sample TEX0 as-is in RGB channels, and put a fixed value + * // as alpha channel, coming from the ENV register. + * rdpq_mode_combiner(RDPQ_COMBINER1((ZERO, ZERO, ZERO, TEX0), (ZERO, ZERO, ZERO, ENV))); + * + * // Set the fixed value in the ENV register. RGB components are ignored as the slot + * // ENV is not used in the RGB combiner formula, so we just put zero there. + * rdpq_set_env_color(RGBA32(0, 0, 0, 0.7*255)); + * + * // Activate blending with the background + * rdpq_mode_blender(RDPQ_BLENDER(IN_RGB, ENV_ALPHA, MEMORY_RGB, INV_MUX_ALPHA)); + * + * // Load the texture in TMEM + * rdpq_tex_load(TILE0, texture, 0); + * + * // Draw the rectangle + * rdpq_texture_rectangle(TILE0, + * 0, 0, 100, 80, + * 0, 0, 1.f, 1.0f); + * @endcode + * + * @param[in] rgb The RGB formula as `(A, B, C, D)` + * @param[in] alpha The ALPHA formula as `(A, B, C, D)` + * + * @see #rdpq_mode_combiner + * @see #rdpq_set_combiner_raw + * @see #RDPQ_COMBINER2 + * @see #RDPQ_BLENDER + * + * @hideinitializer + */ +#define RDPQ_COMBINER1(rgb, alpha) \ + castcc(__rdpq_1cyc_comb_rgb rgb | __rdpq_1cyc_comb_alpha alpha) + +/** + * @brief Build a 2-pass combiner formula + * + * This is similar to #RDPQ_COMBINER1, but it creates a two-passes combiner. + * The combiner unit in RDP in fact allows up to two sequential combiner + * formulas that can be applied to each pixel. + * + * In the second pass, you can refer to the output of the first pass using + * the `COMBINED` slot (not available in the first pass). + * + * Refer to #RDPQ_COMBINER1 for more information. + * + * @see #rdpq_mode_combiner + * @see #rdpq_set_combiner_raw + * @see #RDPQ_COMBINER1 + * @see #RDPQ_BLENDER + * + * @hideinitializer + */ +#define RDPQ_COMBINER2(rgb0, alpha0, rgb1, alpha1) \ + castcc(__rdpq_2cyc_comb2a_rgb rgb0 | __rdpq_2cyc_comb2a_alpha alpha0 | \ + __rdpq_2cyc_comb2b_rgb rgb1 | __rdpq_2cyc_comb2b_alpha alpha1 | \ + RDPQ_COMBINER_2PASS) + + +/** + * @name Standard color combiners + * + * These macros offer some standard color combiner configuration that can be + * used to implement common render modes. + * + * @{ + */ +/** @brief Draw a flat color. + * Configure the color via #rdpq_set_prim_color. + */ +#define RDPQ_COMBINER_FLAT RDPQ_COMBINER1((0,0,0,PRIM), (0,0,0,PRIM)) +/** @brief Draw an interpolated color. + * This can be used for solid, non-textured triangles with + * per-vertex lighting (gouraud shading). The colors must be + * specified on each vertex. Only triangles allow to specify + * a per-vertex color, so you cannot draw rectangles with this. + */ +#define RDPQ_COMBINER_SHADE RDPQ_COMBINER1((0,0,0,SHADE), (0,0,0,SHADE)) +/** + * @brief Draw with a texture. + * This is standard texture mapping, without any lights. + * It can be used for rectangles (#rdpq_texture_rectangle) + * or triangles (#rdpq_triangle). + */ +#define RDPQ_COMBINER_TEX RDPQ_COMBINER1((0,0,0,TEX0), (0,0,0,TEX0)) +/** + * @brief Draw with a texture modulated with a flat color. + * Configure the color via #rdpq_set_prim_color. + * + * Among other uses, this mode is the correct one to colorize a + * #FMT_IA8 and #FMT_IA4 texture with a fixed color. + */ +#define RDPQ_COMBINER_TEX_FLAT RDPQ_COMBINER1((TEX0,0,PRIM,0), (TEX0,0,PRIM,0)) +/** + * @brief Draw with a texture modulated with an interpolated color. + * This does texturing with gouraud shading, and can be used for textured triangles + * with per-vertex lighting. + * + * This mode makes sense only for triangles with per-vertex colors. It should + * not be used with rectangles. + */ +#define RDPQ_COMBINER_TEX_SHADE RDPQ_COMBINER1((TEX0,0,SHADE,0), (TEX0,0,SHADE,0)) +/** @} */ + +/** @name SET_OTHER_MODES bit macros + * + * These macros can be used to assemble a raw `SET_OTHER_MODES` command to send + * via #rdpq_set_other_modes_raw (or #rdpq_change_other_modes_raw). Assembling + * this command manually can be complex because of the different intertwined + * render modes that can be created. Beginners should look into the RDPQ + * mode API before (rdpq_mode.h), + * + * rdpq stores some special flag within unused bits of this register. These + * flags are defined using the prefix `SOMX_`. + */ +///@{ +#define SOMX_NUMLODS_MASK ((cast64(7))<<59) ///< Rdpq extension: number of LODs +#define SOMX_NUMLODS_SHIFT 59 ///< Rdpq extension: number of LODs shift + +#define SOM_ATOMIC_PRIM ((cast64(1))<<55) ///< Atomic: serialize command execution + +#define SOM_CYCLE_1 ((cast64(0))<<52) ///< Set cycle-type: 1cyc +#define SOM_CYCLE_2 ((cast64(1))<<52) ///< Set cycle-type: 2cyc +#define SOM_CYCLE_COPY ((cast64(2))<<52) ///< Set cycle-type: copy +#define SOM_CYCLE_FILL ((cast64(3))<<52) ///< Set cycle-type: fill +#define SOM_CYCLE_MASK ((cast64(3))<<52) ///< Cycle-type mask +#define SOM_CYCLE_SHIFT 52 ///< Cycle-type shift + +#define SOM_TEXTURE_PERSP (cast64(1)<<51) ///< Texture: enable perspective correction +#define SOM_TEXTURE_DETAIL (cast64(1)<<50) ///< Texture: enable "detail" +#define SOM_TEXTURE_SHARPEN (cast64(1)<<49) ///< Texture: enable "sharpen" +#define SOM_TEXTURE_LOD (cast64(1)<<48) ///< Texture: enable LODs. +#define SOM_TEXTURE_LOD_SHIFT 48 ///< Texture: LODs shift + +#define SOM_TLUT_NONE (cast64(0)<<46) ///< TLUT: no palettes +#define SOM_TLUT_RGBA16 (cast64(2)<<46) ///< TLUT: draw with palettes in format RGB16 +#define SOM_TLUT_IA16 (cast64(3)<<46) ///< TLUT: draw with palettes in format IA16 +#define SOM_TLUT_MASK (cast64(3)<<46) ///< TLUT mask +#define SOM_TLUT_SHIFT 46 ///< TLUT mask shift + +#define SOM_SAMPLE_POINT (cast64(0)<<44) ///< Texture sampling: point sampling (1x1) +#define SOM_SAMPLE_BILINEAR (cast64(2)<<44) ///< Texture sampling: bilinear interpolation (2x2) +#define SOM_SAMPLE_MEDIAN (cast64(3)<<44) ///< Texture sampling: mid-texel average (2x2) +#define SOM_SAMPLE_MASK (cast64(3)<<44) ///< Texture sampling mask +#define SOM_SAMPLE_SHIFT 44 ///< Texture sampling mask shift + +#define SOM_TF0_RGB (cast64(1)<<43) ///< Texture Filter, cycle 0 (TEX0): standard fetching (for RGB) +#define SOM_TF0_YUV (cast64(0)<<43) ///< Texture Filter, cycle 0 (TEX0): fetch nearest and do first step of color conversion (for YUV) +#define SOM_TF1_RGB (cast64(2)<<41) ///< Texture Filter, cycle 1 (TEX1): standard fetching (for RGB) +#define SOM_TF1_YUV (cast64(0)<<41) ///< Texture Filter, cycle 1 (TEX1): fetch nearest and do first step of color conversion (for YUV) +#define SOM_TF1_YUVTEX0 (cast64(1)<<41) ///< Texture Filter, cycle 1 (TEX1): don't fetch, and instead do color conversion on TEX0 (allows YUV with bilinear filtering) +#define SOM_TF_MASK (cast64(7)<<41) ///< Texture Filter mask +#define SOM_TF_SHIFT 41 ///< Texture filter mask shift + +#define SOM_RGBDITHER_SQUARE ((cast64(0))<<38) ///< RGB Dithering: square filter +#define SOM_RGBDITHER_BAYER ((cast64(1))<<38) ///< RGB Dithering: bayer filter +#define SOM_RGBDITHER_NOISE ((cast64(2))<<38) ///< RGB Dithering: noise +#define SOM_RGBDITHER_NONE ((cast64(3))<<38) ///< RGB Dithering: none +#define SOM_RGBDITHER_MASK ((cast64(3))<<38) ///< RGB Dithering mask +#define SOM_RGBDITHER_SHIFT 38 ///< RGB Dithering mask shift + +#define SOM_ALPHADITHER_SAME ((cast64(0))<<36) ///< Alpha Dithering: same as RGB +#define SOM_ALPHADITHER_INVERT ((cast64(1))<<36) ///< Alpha Dithering: invert pattern compared to RG +#define SOM_ALPHADITHER_NOISE ((cast64(2))<<36) ///< Alpha Dithering: noise +#define SOM_ALPHADITHER_NONE ((cast64(3))<<36) ///< Alpha Dithering: none +#define SOM_ALPHADITHER_MASK ((cast64(3))<<36) ///< Alpha Dithering mask +#define SOM_ALPHADITHER_SHIFT 36 ///< Alpha Dithering mask shift + +#define SOMX_FOG ((cast64(1))<<32) ///< RDPQ special state: fogging is enabled +#define SOMX_UPDATE_FREEZE ((cast64(1))<<33) ///< RDPQ special state: render mode update is frozen (see #rdpq_mode_begin) +#define SOMX_AA_REDUCED ((cast64(1))<<34) ///< RDPQ special state: reduced antialiasing is enabled +#define SOMX_LOD_INTERPOLATE ((cast64(1))<<35) ///< RDPQ special state: mimap interpolation (aka trilinear) requested + +#define SOM_BLEND0_MASK (cast64(0xCCCC0000) | SOM_BLENDING | SOM_READ_ENABLE | SOMX_BLEND_2PASS) ///< Blender: mask of settings related to pass 0 +#define SOM_BLEND1_MASK (cast64(0x33330000) | SOM_BLENDING | SOM_READ_ENABLE | SOMX_BLEND_2PASS) ///< Blender: mask of settings related to pass 1 +#define SOM_BLEND_MASK (SOM_BLEND0_MASK | SOM_BLEND1_MASK) ///< Blender: mask of all settings + +#define SOMX_BLEND_2PASS ((cast64(1))<<15) ///< RDPQ special state: record that the blender is made of 2 passes + +#define SOM_BLENDING ((cast64(1))<<14) ///< Activate blending for all pixels + +#define SOM_BLALPHA_CC ((cast64(0))<<12) ///< Blender IN_ALPHA is the output of the combiner output (default) +#define SOM_BLALPHA_CVG ((cast64(2))<<12) ///< Blender IN_ALPHA is the coverage of the current pixel +#define SOM_BLALPHA_CVG_TIMES_CC ((cast64(3))<<12) ///< Blender IN_ALPHA is the product of the combiner output and the coverage +#define SOM_BLALPHA_MASK ((cast64(3))<<12) ///< Blender alpha configuration mask +#define SOM_BLALPHA_SHIFT 12 ///< Blender alpha configuration shift + +#define SOM_ZMODE_OPAQUE ((cast64(0))<<10) ///< Z-mode: opaque surface +#define SOM_ZMODE_INTERPENETRATING ((cast64(1))<<10) ///< Z-mode: interprenating surfaces +#define SOM_ZMODE_TRANSPARENT ((cast64(2))<<10) ///< Z-mode: transparent surface +#define SOM_ZMODE_DECAL ((cast64(3))<<10) ///< Z-mode: decal surface +#define SOM_ZMODE_MASK ((cast64(3))<<10) ///< Z-mode mask +#define SOM_ZMODE_SHIFT 10 ///< Z-mode mask shift + +#define SOM_Z_WRITE ((cast64(1))<<5) ///< Activate Z-buffer write +#define SOM_Z_WRITE_SHIFT 5 ///< Z-buffer write bit shift + +#define SOM_Z_COMPARE ((cast64(1))<<4) ///< Activate Z-buffer compare +#define SOM_Z_COMPARE_SHIFT 4 ///< Z-buffer compare bit shift + +#define SOM_ZSOURCE_PIXEL ((cast64(0))<<2) ///< Z-source: per-pixel Z +#define SOM_ZSOURCE_PRIM ((cast64(1))<<2) ///< Z-source: fixed value +#define SOM_ZSOURCE_MASK ((cast64(1))<<2) ///< Z-source mask +#define SOM_ZSOURCE_SHIFT 2 ///< Z-source mask shift + +#define SOM_ALPHACOMPARE_NONE ((cast64(0))<<0) ///< Alpha Compare: disable +#define SOM_ALPHACOMPARE_THRESHOLD ((cast64(1))<<0) ///< Alpha Compare: use blend alpha as threshold +#define SOM_ALPHACOMPARE_NOISE ((cast64(3))<<0) ///< Alpha Compare: use noise as threshold +#define SOM_ALPHACOMPARE_MASK ((cast64(3))<<0) ///< Alpha Compare mask +#define SOM_ALPHACOMPARE_SHIFT 0 ///< Alpha Compare mask shift + +#define SOM_READ_ENABLE ((cast64(1)) << 6) ///< Enable reads from framebuffer +#define SOM_AA_ENABLE ((cast64(1)) << 3) ///< Enable anti-alias + +#define SOM_COVERAGE_DEST_CLAMP ((cast64(0)) << 8) ///< Coverage: add and clamp to 7 (full) +#define SOM_COVERAGE_DEST_WRAP ((cast64(1)) << 8) ///< Coverage: add and wrap from 0 +#define SOM_COVERAGE_DEST_ZAP ((cast64(2)) << 8) ///< Coverage: force 7 (full) +#define SOM_COVERAGE_DEST_SAVE ((cast64(3)) << 8) ///< Coverage: save (don't write) +#define SOM_COVERAGE_DEST_MASK ((cast64(3)) << 8) ///< Coverage mask +#define SOM_COVERAGE_DEST_SHIFT 8 ///< Coverage mask shift + +#define SOM_COLOR_ON_CVG_OVERFLOW ((cast64(1)) << 7) ///< Update color buffer only on coverage overflow +///@} + +///@cond +// Helpers macros for RDPQ_BLENDER +#define _RDPQ_SOM_BLEND1_A_IN_RGB cast64(0) +#define _RDPQ_SOM_BLEND1_A_MEMORY_RGB cast64(1) +#define _RDPQ_SOM_BLEND1_A_BLEND_RGB cast64(2) +#define _RDPQ_SOM_BLEND1_A_FOG_RGB cast64(3) + +#define _RDPQ_SOM_BLEND1_B1_IN_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND1_B1_FOG_ALPHA cast64(1) +#define _RDPQ_SOM_BLEND1_B1_SHADE_ALPHA cast64(2) +#define _RDPQ_SOM_BLEND1_B1_ZERO cast64(3) +#define _RDPQ_SOM_BLEND1_B1_0 cast64(3) + +#define _RDPQ_SOM_BLEND1_B2_INV_MUX_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND1_B2_MEMORY_CVG cast64(1) +#define _RDPQ_SOM_BLEND1_B2_ONE cast64(2) +#define _RDPQ_SOM_BLEND1_B2_1 cast64(2) +#define _RDPQ_SOM_BLEND1_B2_ZERO cast64(3) +#define _RDPQ_SOM_BLEND1_B2_0 cast64(3) + +#define _RDPQ_SOM_BLEND2A_A_IN_RGB cast64(0) +#define _RDPQ_SOM_BLEND2A_A_BLEND_RGB cast64(2) +#define _RDPQ_SOM_BLEND2A_A_FOG_RGB cast64(3) + +#define _RDPQ_SOM_BLEND2A_B1_IN_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND2A_B1_FOG_ALPHA cast64(1) +#define _RDPQ_SOM_BLEND2A_B1_SHADE_ALPHA cast64(2) +#define _RDPQ_SOM_BLEND2A_B1_ZERO cast64(3) +#define _RDPQ_SOM_BLEND2A_B1_0 cast64(3) + +#define _RDPQ_SOM_BLEND2A_B2_INV_MUX_ALPHA cast64(0) // only valid option is "1-b1" in the first pass + +#define _RDPQ_SOM_BLEND2B_A_CYCLE1_RGB cast64(0) +#define _RDPQ_SOM_BLEND2B_A_MEMORY_RGB cast64(1) +#define _RDPQ_SOM_BLEND2B_A_BLEND_RGB cast64(2) +#define _RDPQ_SOM_BLEND2B_A_FOG_RGB cast64(3) + +#define _RDPQ_SOM_BLEND2B_B1_IN_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND2B_B1_FOG_ALPHA cast64(1) +#define _RDPQ_SOM_BLEND2B_B1_SHADE_ALPHA cast64(2) +#define _RDPQ_SOM_BLEND2B_B1_ZERO cast64(3) +#define _RDPQ_SOM_BLEND2B_B1_0 cast64(3) + +#define _RDPQ_SOM_BLEND2B_B2_INV_MUX_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND2B_B2_MEMORY_CVG cast64(1) +#define _RDPQ_SOM_BLEND2B_B2_ONE cast64(2) +#define _RDPQ_SOM_BLEND2B_B2_1 cast64(2) +#define _RDPQ_SOM_BLEND2B_B2_ZERO cast64(3) +#define _RDPQ_SOM_BLEND2B_B2_0 cast64(3) + +#define _RDPQ_SOM_BLEND_EXTRA_A_IN_RGB cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_A_CYCLE1_RGB cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_A_MEMORY_RGB (SOM_READ_ENABLE) +#define _RDPQ_SOM_BLEND_EXTRA_A_BLEND_RGB cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_A_FOG_RGB cast64(0) + +#define _RDPQ_SOM_BLEND_EXTRA_B1_IN_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B1_FOG_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B1_SHADE_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B1_ZERO cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B1_0 cast64(0) + +#define _RDPQ_SOM_BLEND_EXTRA_B2_INV_MUX_ALPHA cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B2_MEMORY_CVG (SOM_READ_ENABLE) +#define _RDPQ_SOM_BLEND_EXTRA_B2_ONE cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B2_1 cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B2_ZERO cast64(0) +#define _RDPQ_SOM_BLEND_EXTRA_B2_0 cast64(0) + +#define __rdpq_blend(cyc, a1, b1, a2, b2, sa1, sb1, sa2, sb2) (\ + ((_RDPQ_SOM_BLEND ## cyc ## _A_ ## a1) << sa1) | \ + ((_RDPQ_SOM_BLEND ## cyc ## _B1_ ## b1) << sb1) | \ + ((_RDPQ_SOM_BLEND ## cyc ## _A_ ## a2) << sa2) | \ + ((_RDPQ_SOM_BLEND ## cyc ## _B2_ ## b2) << sb2) | \ + (_RDPQ_SOM_BLEND_EXTRA_A_ ## a1) | \ + (_RDPQ_SOM_BLEND_EXTRA_B1_ ## b1) | \ + (_RDPQ_SOM_BLEND_EXTRA_A_ ## a2) | \ + (_RDPQ_SOM_BLEND_EXTRA_B2_ ## b2) \ +) + +#define __rdpq_blend_1cyc_0(a1, b1, a2, b2) __rdpq_blend(1, a1, b1, a2, b2, 30, 26, 22, 18) +#define __rdpq_blend_1cyc_1(a1, b1, a2, b2) __rdpq_blend(1, a1, b1, a2, b2, 28, 24, 20, 16) +#define __rdpq_blend_2cyc_0(a1, b1, a2, b2) __rdpq_blend(2A, a1, b1, a2, b2, 30, 26, 22, 18) +#define __rdpq_blend_2cyc_1(a1, b1, a2, b2) __rdpq_blend(2B, a1, b1, a2, b2, 28, 24, 20, 16) +///@endcond + +/** + * @brief Build a 1-pass blender formula + * + * This macro allows to build a 1-pass blender formula. + * In general, the blender is able to execute the following + * per-pixel formula: + * + * (P * A) + (Q * B) + * + * where P and Q are usually pixel inputs, while A and B are + * blending factors. `P`, `Q`, `A`, `B` can be configured picking + * several possible inputs called "slots". + * + * The macro must be invoked as: + * + * RDPQ_BLENDER((P, A, Q, B)) + * + * where `P`, `A`, `Q`, `B` can be any of the values described below. + * Please notice the double parenthesis. + * + * For example, this macro: + * + * RDPQ_BLENDER((IN_RGB, IN_ALPHA, MEMORY_RGB, 1)) + * + * configures the formula: + * + * (IN_RGB * IN_ALPHA) + (MEMORY_RGB * 1.0) + * + * The value created is of type #rdpq_blender_t. They can be used + * in two different ways: + * + * * When using the higher-level mode API (rdpq_mode.h), the blender + * formula can be passed to either #rdpq_mode_fog or #rdpq_mode_blender. + * The blender unit is in fact capable of running up two passes + * in sequence, so each function configures one different pass. + * * When using the lower-level API (#rdpq_set_other_modes_raw), + * the value created by #RDPQ_BLENDER can be directly combined + * with other `SOM_*` macros to create the final value to + * pass to the function. If a two-pass blender must be configured, + * use #RDPQ_BLENDER2 instead. + * + * Pre-made formulas for common scenarios are available: see + * #RDPQ_BLENDER_MULTIPLY, #RDPQ_BLENDER_ADDITIVE, #RDPQ_FOG_STANDARD. + * + * These are all possible inputs for `P` and `Q`: + * + * * `IN_RGB`: The RGB channels of the pixel being drawn. This is + * actually the output of the color combiner (that can be + * configured via #rdpq_mode_combiner, #RDPQ_COMBINER1, + * and #RDPQ_COMBINER2). + * * `MEMORY_RGB`: Current contents of the framebuffer, where the + * current pixel will be drawn. Reading the framebuffer contents + * and using them in the formula allows to create the typical + * blending effect. + * * `BLEND_RGB`: A fixed RGB value programmed into the BLEND register. + * This can be configured via #rdpq_set_blend_color. + * * `FOG_RGB`: A fixed RGB value programmed into the FOG register. + * This can be configured via #rdpq_set_fog_color. + * + * These are all possible inputs for `A`: + * + * * `IN_ALPHA`: The alpha channel of the pixel being drawn. This is + * actually the output of the color combiner (that can be + * configured via #rdpq_mode_combiner, #RDPQ_COMBINER1, + * and #RDPQ_COMBINER2). + * * `FOG_ALPHA`: The alpha channel of the FOG register. + * This can be configured via #rdpq_set_fog_color. + * * `SHADE_ALPHA`: The alpha channel of the shade color. + * The shade component is the color optionally set on + * each vertex when drawing a triangle (see #rdpq_triangle). + * The RDP interpolates it on each pixel. + * * `0`: the constant value 0. + * + * These are all possible inputs for `B`: + * + * * `INV_MUX_ALPHA`: This value is the inverse of whatever input + * was selected for `A`. For instance, if `A` was configured + * as `FOG_ALPHA`, setting `B` to `INV_MUX_ALPHA` means using + * `1.0 - FOG_ALPHA` in the calculation. This basically allows + * to do a linear interpolation between `P` and `Q` where + * `A` is the interpolation factor. + * * `MEMORY_CVG`: This is the subpixel coverage value stored in + * the framebuffer at the position where the current pixel will + * be drawn. The coverage is normally stored as a value in the + * range 0-7, but the blender normalizes in the range 0.0-1.0. + * * `1`: the constant value 1. + * * `0`: the constant value 0. + * + * The blender uses the framebuffer precision for the RGB channels: + * when drawing to a 32-bit framebuffer, `P` and `Q` will have + * 8-bit precision per channel, whilst when drawing to a 16-bit + * framebuffer, `P` and `Q` will be 5-bit. You can add + * dithering if needed, via #rdpq_mode_dithering. + * + * On the other hand, `A` and `B` always have a reduced 5-bit + * precision, even on 32-bit framebuffers. This means that the + * alpha values will be quantized during the blending, possibly + * creating mach banding. Consider using dithering via + * #rdpq_mode_dithering to improve the quality of the picture. + * + * Notice that the blender formula only works on RGB channels. Alpha + * channels can be used as input (as multiplicative factor), but the + * blender does not produce an alpha channel as output. In fact, + * the RGB output will be written to the framebuffer after the blender, + * while the bits normally used for alpha in each framebuffer pixel + * will contain information about subpixel coverage (that will + * be then used by VI for doing antialiasing as a post-process filter + * -- see #rdpq_mode_antialias for a brief explanation). + * + * @see #rdpq_mode_blender + * @see #rdpq_mode_fog + * @see #rdpq_mode_dithering + * @see #rdpq_set_fog_color + * @see #rdpq_set_blend_color + * @see #rdpq_set_other_modes_raw + * + * @hideinitializer + */ +#define RDPQ_BLENDER(bl) castbl(__rdpq_blend_1cyc_0 bl | __rdpq_blend_1cyc_1 bl) + +/** + * @brief Build a 2-pass blender formula + * + * This macro is similar to #RDPQ_BLENDER, but it can be used to build a + * two-passes blender formula. This formula can be then configured using the + * mode API via #rdpq_mode_blender, or using the lower-level API via + * #rdpq_change_other_modes_raw. + * + * Refer to #RDPQ_BLENDER for information on how to build a blender formula. + * + * In two-passes mode, there are a few differences and gotchas in the way the formula + * must be constructed: + * + * * In the first pass, `B` must be `INV_MUX_ALPHA` (any other value is invalid + * and will result in a compile-time error). + * * In the first pass, `MEMORY_RGB` is not available. + * * In the second pass, `IN_RGB` is not available, but you can + * instead use `CYCLE1_RGB` to refer to the output of the first cycle. + * `IN_ALPHA` is still available (as the blender does not produce a alpha + * output, so the input alpha is available also in the second pass). + * * In the second pass, because of a hardware bug, `SHADE_ALPHA` will actually + * refer to the alpha color of the *next* pixel in the scanline (the pixel + * to the right). On the last pixel of the triangle in each scanline, the + * value read as `SHADE_ALPHA` is mostly undefined. Given this hardware bug, + * avoid using `SHADE_ALPHA` in the second pass if possible. + * + * @see #RDPQ_BLENDER + * @see #rdpq_mode_blender + * @see #rdpq_set_other_modes_raw + * + * @hideinitializer + */ +#define RDPQ_BLENDER2(bl0, bl1) castbl(__rdpq_blend_2cyc_0 bl0 | __rdpq_blend_2cyc_1 bl1 | SOMX_BLEND_2PASS) + +#endif diff --git a/include/rdpq_mode.h b/include/rdpq_mode.h new file mode 100644 index 0000000000..bf88e53b26 --- /dev/null +++ b/include/rdpq_mode.h @@ -0,0 +1,894 @@ +/** + * @file rdpq_mode.h + * @brief RDP Command queue: mode setting + * @ingroup rdp + * + * The mode API is a high level API to simplify mode setting with RDP. Configuring + * render modes is possibly the most complex task with RDP programming, as the RDP + * is full of hardware features that interact badly between them or are in general + * non-orthogonal. The mode API tries to hide much of the complexity between an API + * more similar to a modern graphic API like OpenGL. + * + * In general, mode setting with RDP is performed via two commands SET_COMBINE_MODE + * and SET_OTHER_MODES. These two commands are available as "raw" commands in the + * basic rdpq API as #rdpq_set_combiner_raw and #rdpq_set_other_modes_raw. These + * two functions set the specified configurations into the RDP hardware registers, + * and do nothing else, so they can always be used to do manual RDP programming. + * + * Instead, the mode API follows the following pattern: + * + * * First, one of the basic **render modes** must be set via one of + * the `rdpq_set_mode_*` functions. + * * Afterwards, it is possible to tweak the render mode by chang ing + * one or more **render states** via `rdpq_mode_*` functions. + * + * The rdpq mode API currently offers the following render modes: + * + * * **Standard** (#rdpq_set_mode_standard). This is the most basic and general + * render mode. It allows to use all RDP render states (that must be activated via the + * various `rdpq_mode_*` functions). + * * **Copy** (#rdpq_set_mode_copy). This is a fast (4x) mode in which the RDP + * can perform fast blitting of textured rectangles (aka sprites). All texture + * formats are supported, and color 0 can be masked for transparency. Textures + * can be scaled and rotated, but not mirrored. Blending is not supported. + * * **Fill** (#rdpq_set_mode_fill). This is a fast (4x) mode in which the RDP + * is able to quickly fill a rectangular portion of the target buffer with a + * fixed color. It can be used to clear the screen. Blending is not supported. + * * **YUV** (#rdpq_set_mode_yuv). This is a render mode that can be used to + * blit YUV textures, converting them to RGB. Support for YUV textures in RDP + * does in fact require a specific render mode (you cannot use YUV textures + * otherwise). It is possible to decide whether to activate or not bilinear + * filtering, as it makes RDP 2x slow when used in this mode. + * + * After setting the render mode, you can configure the render states. An important + * implementation effort has been made to try and make the render states orthogonal, + * so that each one can be toggled separately without inter-dependence (a task + * which is particularly complex on the RDP hardware). Not all render states are + * available in all modes, refer to the documentation of each render state for + * further information. + * + * * Antialiasing (#rdpq_mode_antialias). Activate antialiasing on both internal + * and external edges. + * * Combiner (FIXME) + * * Blending (FIXME) + * * Fog (FIXME) + * * Dithering (#rdpq_mode_dithering). Activate dithering on either the RGB channels, + * the alpha channel, or both. + * * Alpha compare (#rdpq_mode_alphacompare). Activate alpha compare function using + * a fixed threshold. + * * Z-Override (#rdpq_mode_zoverride): Give a fixed Z value to a whole triangle or + * rectangle. + * * TLUT (#rdpq_mode_tlut): activate usage of palettes. + * * Filtering (#rdpq_mode_filter): activate bilinear filtering. + * + * @note From a hardware perspective, rdpq handles automatically the "RDP cycle type". + * That is, it transparently switches from "1-cycle mode" to "2-cycle mode" + * whenever it is necessary. If you come from a RDP low-level programming + * background, it might be confusing at first because everything "just works" + * without needing to adjust settings any time you need to change a render state. + * + * + * ## Mode setting stack + * + * The mode API also keeps a small (4 entry) stack of mode configurations. This + * allows client code to temporarily switch render mode and then get back to + * the previous mode, which helps modularizing the code. + * + * To save the current render mode onto the stack, use #rdpq_mode_push. To restore + * the previous render mode from the stack, use #rdpq_mode_pop. + * + * Notice the mode settings being part of this stack are those which are configured + * via the mode API functions itself (`rdpq_set_mode_*` and `rdpq_mode_*`). Anything + * that doesn't go through the mode API is not saved/restored. For instance, + * activating blending via #rdpq_mode_blender is saved onto the stack, whilst + * changing the BLEND color register (via #rdpq_set_blend_color) is not, and you + * can tell by the fact that the function called to configure it is not part of + * the mode API. + * + */ +#ifndef LIBDRAGON_RDPQ_MODE_H +#define LIBDRAGON_RDPQ_MODE_H + +#include "rdpq.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +///@cond +// Internal helpers, not part of the public API +inline void __rdpq_mode_change_som(uint64_t mask, uint64_t val); +///@endcond + +/** + * @brief Push the current render mode into the stack + * + * This function allows to push the current render mode into an internal stack. + * It allows to temporarily modify the render mode, and later recover its value. + * + * This is effective on all render mode changes that can be modified via + * rdpq_mode_* function. It does not affect other RDP configurations such as + * the various colors. + * + * The stack has 4 slots (including the current one). + */ + +void rdpq_mode_push(void); + +/** + * @brief Pop the current render mode from the stack + * + * This function allows to pop a previously pushed render mode from the stack, + * setting it as current again. + */ + +void rdpq_mode_pop(void); + +/** + * @brief Texture filtering types + */ +typedef enum rdpq_filter_s { + FILTER_POINT = SOM_SAMPLE_POINT >> SOM_SAMPLE_SHIFT, ///< Point filtering (aka nearest) + FILTER_BILINEAR = SOM_SAMPLE_BILINEAR >> SOM_SAMPLE_SHIFT, ///< Bilinear filtering + FILTER_MEDIAN = SOM_SAMPLE_MEDIAN >> SOM_SAMPLE_SHIFT, ///< Median filtering +} rdpq_filter_t; + +/** + * @brief Dithering configuration + * + * RDP can optionally perform dithering on RGB and Alpha channel of the texture. + * The dithering is performed by the blender unit, which is also in charge of + * adapting the pixel color depth to that of the framebuffer. Dithering is + * a good way to reduce the mach banding effect created by color depth + * reduction. + * + * The blender in fact will reduce the RGB components of the pixel (coming + * from the color combiner) to 5-bit when the framebuffer is 16-bit. If the + * framebuffer is 32-bit, the blender formula will be calculated with 8-bit + * per channel, so no dithering is required. + * + * On the other hand, the alpha channels (used as multiplicative factors + * in the blender formulas) will always be reduced to 5-bit depth, even if + * the framebuffer is 32-bit. If you see banding artifacts in transparency levels + * of blended polygons, you may want to activate dithering on the alpha channel. + * + * It is important to notice that the VI can optionally run an "dither filter" + * on the final image, while sending it to the video output. This + * algorithm tries to recover color depth precision by averaging lower bits + * in neighborhood pixels, and reducing the small noise created by dithering. + * #display_init currently activates it by default on all 16-bit display modes, + * if passed #FILTERS_DEDITHER or #FILTERS_RESAMPLE_ANTIALIAS_DEDITHER. + * + * If you are using an emulator, make sure it correctly emulates the VI + * dither filter to judge the quality of the final image. For instance, + * the RDP plugin parallel-RDP (based on Vulkan) emulates it very accurately, + * so emulators like Ares, dgb-n64 or simple64 will produce a picture closer to + * real hardware. + * + * The supported dither algorithms are: + * + * * `SQUARE` (aka "magic square"). This is a custom dithering + * algorithm, designed to work best with the VI dither filter. When + * using it, the VI will reconstruct a virtually perfect 32-bit image + * even though the framebuffer is only 16-bit. + * * `BAYER`: standard Bayer dithering. This algorithm looks + * better than the magic square when the VI dither filter is disabled, + * or in some specific scenarios like large blended polygons. Make + * sure to test it as well. + * * `INVSQUARE` and `INVBAYER`: these are the same algorithms, but using + * an inverse (symmetrical) pattern. They can be selected for alpha + * channels to avoid making transparency phase with color dithering, + * which is sometimes awkward. + * * `NOISE`: random noise dithering. The dithering is performed + * by perturbing the lower bit of each pixel with random noise. + * This will create a specific visual effect as it changes from frame to + * frame even on still images; it is especially apparent when used on + * alpha channel as it can affect transparency. It is more commonly used + * as a graphic effect rather than an actual dithering. + * * `NONE`: disable dithering. + * + * While the RDP hardware allows to configure different dither algorithms + * for RGB and Alpha channels, unfortunately not all combinations are + * available. This enumerator defines the available combinations. For + * instance, #DITHER_BAYER_NOISE selects the Bayer dithering for the + * RGB channels, and the noise dithering for alpha channel. + */ + +typedef enum rdpq_dither_s { + DITHER_SQUARE_SQUARE = (SOM_RGBDITHER_SQUARE | SOM_ALPHADITHER_SAME) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Square, Alpha=Square + DITHER_SQUARE_INVSQUARE = (SOM_RGBDITHER_SQUARE | SOM_ALPHADITHER_INVERT) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Square, Alpha=InvSquare + DITHER_SQUARE_NOISE = (SOM_RGBDITHER_SQUARE | SOM_ALPHADITHER_NOISE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Square, Alpha=Noise + DITHER_SQUARE_NONE = (SOM_RGBDITHER_SQUARE | SOM_ALPHADITHER_NONE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Square, Alpha=None + + DITHER_BAYER_BAYER = (SOM_RGBDITHER_BAYER | SOM_ALPHADITHER_SAME) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Bayer, Alpha=Bayer + DITHER_BAYER_INVBAYER = (SOM_RGBDITHER_BAYER | SOM_ALPHADITHER_INVERT) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Bayer, Alpha=InvBayer + DITHER_BAYER_NOISE = (SOM_RGBDITHER_BAYER | SOM_ALPHADITHER_NOISE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Bayer, Alpha=Noise + DITHER_BAYER_NONE = (SOM_RGBDITHER_BAYER | SOM_ALPHADITHER_NONE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Bayer, Alpha=None + + DITHER_NOISE_SQUARE = (SOM_RGBDITHER_NOISE | SOM_ALPHADITHER_SAME) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Noise, Alpha=Square + DITHER_NOISE_INVSQUARE = (SOM_RGBDITHER_NOISE | SOM_ALPHADITHER_INVERT) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Noise, Alpha=InvSquare + DITHER_NOISE_NOISE = (SOM_RGBDITHER_NOISE | SOM_ALPHADITHER_NOISE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Noise, Alpha=Noise + DITHER_NOISE_NONE = (SOM_RGBDITHER_NOISE | SOM_ALPHADITHER_NONE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=Noise, Alpha=None + + DITHER_NONE_BAYER = (SOM_RGBDITHER_NONE | SOM_ALPHADITHER_SAME) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=None, Alpha=Bayer + DITHER_NONE_INVBAYER = (SOM_RGBDITHER_NONE | SOM_ALPHADITHER_INVERT) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=None, Alpha=InvBayer + DITHER_NONE_NOISE = (SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NOISE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=None, Alpha=Noise + DITHER_NONE_NONE = (SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE) >> SOM_ALPHADITHER_SHIFT, ///< Dithering: RGB=None, Alpha=None +} rdpq_dither_t; + +/** + * @brief Types of palettes supported by RDP + */ +typedef enum rdpq_tlut_s { + TLUT_NONE = 0, ///< No palette + TLUT_RGBA16 = 2, ///< Palette made of #FMT_RGBA16 colors + TLUT_IA16 = 3, ///< Palette made of #FMT_IA16 colors +} rdpq_tlut_t; + +/** + * @brief Converts the specified texture format to the TLUT mode that is needed to draw a texture of this format + */ +inline rdpq_tlut_t rdpq_tlut_from_format(tex_format_t format) { + switch (format) { + case FMT_CI4: + case FMT_CI8: + return TLUT_RGBA16; + default: + return TLUT_NONE; + } +} + +/** + * @brief Types of mipmap supported by RDP + */ +typedef enum rdpq_mipmap_s { + MIPMAP_NONE = 0, ///< Mipmap disabled + MIPMAP_NEAREST = SOM_TEXTURE_LOD >> 32, ///< Choose the nearest mipmap level + MIPMAP_INTERPOLATE = (SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE) >> 32, ///< Interpolate between the two nearest mipmap levels (also known as "trilinear") + MIPMAP_INTERPOLATE_SHARPEN = (SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE | SOM_TEXTURE_SHARPEN) >> 32, ///< Interpolate between the two nearest mipmap levels (also known as "trilinear") with sharpening enabled + MIPMAP_INTERPOLATE_DETAIL = (SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE | SOM_TEXTURE_DETAIL) >> 32, ///< Interpolate between the two nearest mipmap levels (also known as "trilinear") with detail texture enabled +} rdpq_mipmap_t; + +/** + * @brief Types of antialiasing supported by RDP + */ +typedef enum rdpq_antialias_s { + AA_NONE = 0, ///< No antialiasing + AA_STANDARD = 1, ///< Standard antialiasing + AA_REDUCED = 2, ///< Reduced antialiasing +} rdpq_antialias_t; + + +/** + * @name Render modes + * + * These functions set a new render mode from scratch. Every render state is + * reset to some value (or default), so no previous state is kept valid. + * + * @{ + */ + +/** + * @brief Reset render mode to standard. + * + * This is the most basic and general mode reset function. It configures the RDP + * processor in a standard and very basic way: + * + * * Basic texturing (without shading) + * * No dithering, antialiasing, blending, etc. + * + * You can further configure the mode by calling one of the many functions + * in the mode API (`rdpq_mode_*`). + */ +void rdpq_set_mode_standard(void); + + +/** + * @brief Reset render mode to FILL type. + * + * This function sets the render mode type to FILL, which is used to quickly + * fill portions of the screens with a solid color. The specified color is + * configured via #rdpq_set_fill_color, and can be changed later. + * + * Notice that in FILL mode most of the RDP features are disabled, so all other + * render modes settings (rdpq_mode_* functions) do not work. + * + * @param[in] color The fill color to use + */ +inline void rdpq_set_mode_fill(color_t color) { + extern void __rdpq_set_mode_fill(void); + __rdpq_set_mode_fill(); + rdpq_set_fill_color(color); +} + +/** + * @brief Reset render mode to COPY type. + * + * This function sets the render mode type to COPY, which is used to quickly + * blit bitmaps. In COPY mode, only texture rectangles (aka "sprites") can be + * drawn and no advanced render mode features are working (rdpq_mode_* functions). + * + * The only available feature is transparency: pixels with alpha set to 0 can + * optionally be discarded during blit, so that the target buffer contents is + * not overwritten for those pixels. This is implemented using alpha compare. + * + * The COPY mode is approximately 4 times faster at drawing than the standard + * mode, so make sure to enable it whenever it is possible. + * + * @note The COPY mode only works with 16-bpp framebuffers. It will trigger a + * hardware crash (!) on 32-bpp framebuffers, so avoid using it. The + * validator will warn you about this anyway. + * + * @param[in] transparency If true, pixels with alpha set to 0 are not drawn + * + * @see #rdpq_set_mode_standard + */ +void rdpq_set_mode_copy(bool transparency); + +/** + * @brief Reset render mode to YUV mode. + * + * This is a helper function to configure a render mode for YUV conversion. + * In addition of setting the render mode, this function also configures a + * combiner (given that YUV conversion happens also at the combiner level), + * and set standard YUV parameters (for BT.601 TV Range). + * + * After setting the YUV mode, you can load YUV textures to TMEM (using a + * surface with #FMT_YUV16), and then draw them on the screen as part of + * triangles or rectangles. + * + * @param[in] bilinear If true, YUV textures will also be filtered with + * bilinear interpolation (note: this will require + * 2-cycle mode so it will be twice as slow). + */ +void rdpq_set_mode_yuv(bool bilinear); + +/** @} */ + +/** + * @name Render states + * + * These functions allow to tweak individual render states. They should be called + * after one of the render mode reset functions to configure the render states. + * + * @{ + */ + + +/** + * @brief Activate antialiasing + * + * This function can be used to enable/disable antialias at the RDP level. + * There are two different kinds of antialias on N64: + * + * * Antialias on internal edges: this is fully performed by RDP. + * * Antialias on external edges: this is prepared by RDP but is actually + * performed as a post-processing filter by VI. + * + * This function activates both kinds of antialias, but to display correctly + * the second type, make sure that you did pass #FILTERS_RESAMPLE_ANTIALIAS or + * #FILTERS_RESAMPLE_ANTIALIAS_DEDITHER to #display_init. + * + * On the other hand, if you want to make sure that no antialias is performed, + * disable antialias with `rdpq_mode_antialias(false)` (which is the default + * for #rdpq_set_mode_standard), and that will make sure that the VI will not + * do anything to the image, even if #display_init was called with + * #FILTERS_RESAMPLE_ANTIALIAS or #FILTERS_RESAMPLE_ANTIALIAS_DEDITHER. + * + * @note Antialiasing internally uses the blender unit. If you already + * configured a formula via #rdpq_mode_blender, antialias will just + * rely on that one to correctly blend pixels with the framebuffer. It is + * thus important that a custom formula configured via #rdpq_mode_blender + * does blend with the background somehow. + * + * @param mode Antialiasing mode to use (or AA_NONE to disable) + */ +inline void rdpq_mode_antialias(rdpq_antialias_t mode) +{ + // Just enable/disable SOM_AA_ENABLE. The RSP will then update the render mode + // which would trigger different other bits in SOM depending on the current mode. + __rdpq_mode_change_som(SOM_AA_ENABLE | SOMX_AA_REDUCED, + (mode ? SOM_AA_ENABLE : 0) | (mode == AA_REDUCED ? SOMX_AA_REDUCED : 0)); +} + +/** + * @brief Configure the color combiner + * + * This function allows to configure the color combiner formula to be used. + * The color combiner is the internal RDP hardware unit that mixes inputs + * from textures, colors and other sources and produces a RGB/Alpha value, + * that is then sent to the blender unit. If the blender is disabled (eg: + * the polygon is solid), the value produced by the combiner is the one + * that will be written into the framebuffer. + * + * For common use cases, rdpq offers ready-to-use macros that you can pass + * to #rdpq_mode_combiner: #RDPQ_COMBINER_FLAT, #RDPQ_COMBINER_SHADE, + * #RDPQ_COMBINER_TEX, #RDPQ_COMBINER_TEX_FLAT, #RDPQ_COMBINER_TEX_SHADE. + * + * For example, to draw a texture rectangle modulated with a flat color: + * + * @code{.c} + * // Reset to standard rendering mode. + * rdpq_set_mode_standard(); + * + * // Configure the combiner + * rdpq_mode_combiner(RDPQ_COMBINER_TEX_FLAT); + * + * // Configure the flat color that will modulate the texture + * rdpq_set_prim_color(RGBA32(192, 168, 74, 255)); + * + * // Upload a texture into TMEM (tile descriptor #4) + * rdpq_tex_upload(TILE4, &texture, 0); + * + * // Draw the rectangle + * rdpq_texture_rectangle(TILE4, + * 0, 0, 32, 16, // x0, y0, x1, y1 + * 0, 0, 1.0, 1.0f // s, t, ds, dt + * ); + * @endcode + * + * Alternatively, you can use your own combiner formulas, created with either + * #RDPQ_COMBINER1 (one pass) or #RDPQ_COMBINER2 (two passes). See the respective + * documentation for all the details on how to create a custom formula. + * + * When using a custom formula, you must take into account that some render states + * also rely on the combiner to work. Specifically: + * + * * Mipmap (#rdpq_mode_mipmap): when activating interpolated mipmapping + * (#MIPMAP_INTERPOLATE, also known as "trilinear filterig"), a dedicated + * color combiner pass is needed, so if you set a custom formula, it has to be + * a one-pass formula. Otherwise, a RSP assertion will trigger. + * * Fog (#rdpq_mode_fog): fogging is generally made by substituting the alpha + * component of the shade color with a depth value, which is then used in + * the blender formula (eg: #RDPQ_FOG_STANDARD). The only interaction with the + * color combiner is that the SHADE alpha component should not be used as + * a modulation factor in the combiner, otherwise you get wrong results + * (if you then use the alpha for blending). rdpq automatically adjusts + * standard combiners using shade (#RDPQ_COMBINER_SHADE and #RDPQ_COMBINER_TEX_SHADE) + * when fog is enabled, but for custom combiners it is up to the user to + * take care of that. + * + * @param comb The combiner formula to configure + * + * @see #RDPQ_COMBINER1 + * @see #RDPQ_COMBINER2 + * + * @note For programmers with previous RDP programming experience: this function + * makes sure that the current cycle type can work correctly with the + * specified combiner formula. Specifically, it switches automatically + * between 1-cycle and 2-cycle depending on the formula being set and the + * blender unit configuration, and also automatically adapts combiner + * formulas to the required cycle mode. See the documentation in rdpq.c + * for more information. + */ +inline void rdpq_mode_combiner(rdpq_combiner_t comb) { + extern void __rdpq_fixup_mode(uint32_t cmd_id, uint32_t w0, uint32_t w1); + extern void __rdpq_fixup_mode4(uint32_t cmd_id, uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3); + + if (comb & RDPQ_COMBINER_2PASS) + __rdpq_fixup_mode(RDPQ_CMD_SET_COMBINE_MODE_2PASS, + (comb >> 32) & 0x00FFFFFF, + comb & 0xFFFFFFFF); + else { + rdpq_combiner_t comb1_mask = RDPQ_COMB1_MASK; + if (((comb >> 0 ) & 7) == 1) comb1_mask ^= 1ull << 0; + if (((comb >> 3 ) & 7) == 1) comb1_mask ^= 1ull << 3; + if (((comb >> 6 ) & 7) == 1) comb1_mask ^= 1ull << 6; + if (((comb >> 18) & 7) == 1) comb1_mask ^= 1ull << 18; + if (((comb >> 21) & 7) == 1) comb1_mask ^= 1ull << 21; + if (((comb >> 24) & 7) == 1) comb1_mask ^= 1ull << 24; + if (((comb >> 32) & 31) == 1) comb1_mask ^= 1ull << 32; + if (((comb >> 37) & 15) == 1) comb1_mask ^= 1ull << 37; + + __rdpq_fixup_mode4(RDPQ_CMD_SET_COMBINE_MODE_1PASS, + (comb >> 32) & 0x00FFFFFF, + comb & 0xFFFFFFFF, + (comb1_mask >> 32) & 0x00FFFFFF, + comb1_mask & 0xFFFFFFFF); + } +} + +/** @brief Blending mode: multiplicative alpha. + * + * This is standard multiplicative blending between the color being + * drawn and the framebuffer color. + * + * You can pass this macro to #rdpq_mode_blender. + */ +#define RDPQ_BLENDER_MULTIPLY RDPQ_BLENDER((IN_RGB, IN_ALPHA, MEMORY_RGB, INV_MUX_ALPHA)) + +/** @brief Blending mode: multiplicative alpha with a constant value. + * + * This is similar to #RDPQ_BLENDER_MULTIPLY, but instead of using the alpha + * value from the texture (or rather, the one coming out of the color combiner), + * it uses a constant value that must be programmed via #rdpq_set_fog_color: + * + * You can pass this macro to #rdpq_mode_blender: + * + * @code{.c} + * float alpha = 0.5f; + * rdpq_set_fog_color(RGBA32(0, 0, 0, alpha * 255)); + * rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY_CONST); + * @endcode + * + * Notice that the alpha value coming out of the combiner is ignored. This + * means that you can use this blender formula even for blending textures without + * alpha channel. + */ +#define RDPQ_BLENDER_MULTIPLY_CONST RDPQ_BLENDER((IN_RGB, FOG_ALPHA, MEMORY_RGB, INV_MUX_ALPHA)) + +/** @brief Blending mode: additive alpha. + * You can pass this macro to #rdpq_mode_blender. + * + * NOTE: additive blending is broken on RDP because it can overflow. Basically, + * if the result of the sum is larger than 1.5 (in scale 0..1), instead + * of being clamped to 1, it overflows back to 0, which makes the + * mode almost useless. It is defined it for completeness. + */ +#define RDPQ_BLENDER_ADDITIVE RDPQ_BLENDER((IN_RGB, IN_ALPHA, MEMORY_RGB, ONE)) + +/** + * @brief Configure the formula to use for blending. + * + * This function can be used to configure the formula used + * in the blender unit. + * + * The standard blending formulas are: + * + * * #RDPQ_BLENDER_MULTIPLY: multiplicative alpha blending + * * #RDPQ_BLENDER_ADDITIVE: additive alpha blending + * + * It is possible to also create custom formulas. The blender unit + * allows for up to two passes. Use #RDPQ_BLENDER to create a one-pass + * blending formula, or #RDPQ_BLENDER2 to create a two-pass formula. + * + * Please notice that two-pass formulas are not compatible with fogging + * (#rdpq_mode_fog). Also notice that rdpq_mode assumes that any formula + * that you set here (either one-pass or two-passes) does blend with the + * background. If you want to use a formula that does not blend with the + * background, set it via #rdpq_mode_fog, otherwise you might get incorrect + * results when using anti-alias (see #rdpq_mode_antialias). + * + * The following example shows how to draw a texture rectangle using + * a fixed blending value of 0.5 (ignoring the alpha channel of the + * texture): + * + * @code{.c} + * // Set standard mode + * rdpq_set_mode_standard(); + * + * // Configure the formula: + * // (IN_RGB * FOG_ALPHA) + (MEMORY_RGB * (1 - FOG_ALPHA)) + * // + * // where FOG_ALPHA is the fixed alpha value coming from the FOG register. + * // Notice that the FOG register is not necessarily about fogging... it is + * // just one of the two registers that can be used in blending formulas. + * rdpq_mode_blender(RDPQ_BLENDER(IN_RGB, FOG_ALPHA, MEMORY_RGB, INV_MUX_ALPHA)); + * + * // Configure the FOG_ALPHA value to 128 (= 0.5). The RGB components are + * // not used. + * rdpq_set_fog_color(RGBA32(0,0,0, 128)); + * + * // Load a texture into TMEM + * rdpq_tex_upload(TILE0, texture, 0); + * + * // Draw it + * rdpq_texture_rectangle(TILE0, + * 0, 0, 64, 64, // x0,y0 - x1,y1 + * 0, 0, 1.0, 1.0 // s0,t0 - ds,dt + * ); + * @endcode + * + * @param blend Blending formula created with #RDPQ_BLENDER, + * or 0 to disable. + * + * @see #rdpq_mode_fog + * @see #RDPQ_BLENDER + * @see #RDPQ_BLENDER_MULTIPLY + * @see #RDPQ_BLENDER_ADDITIVE + */ +inline void rdpq_mode_blender(rdpq_blender_t blend) { + extern void __rdpq_fixup_mode(uint32_t cmd_id, uint32_t w0, uint32_t w1); + if (blend) blend |= SOM_BLENDING; + __rdpq_fixup_mode(RDPQ_CMD_SET_BLENDING_MODE, 0, blend); +} + +/** @brief Fogging mode: standard. + * You can pass this macro to #rdpq_mode_fog. */ +#define RDPQ_FOG_STANDARD RDPQ_BLENDER((IN_RGB, SHADE_ALPHA, FOG_RGB, INV_MUX_ALPHA)) + +/** + * @brief Enable or disable fog + * + * This function enables fog on RDP. Fog on RDP is simulated in the + * following way: + * + * * The T&L pipeline must calculate a depth information for each + * vertex of the primitive and put it into the alpha channel of + * the per-vertex color. This is outside of the scope of rdpq, + * so rdpq assumes that this has already been done when + * #rdpq_mode_fog is called. + * * The RDP blender unit is programmed to modulate a "fog color" + * with the polygon pixel, using SHADE_ALPHA as interpolation + * factor. Since SHADE_ALPHA contains a depth information, the + * farther the object, the stronger it will assume the fog color. + * + * To enable fog, pass #RDPQ_FOG_STANDARD to this function, and + * call #rdpq_set_fog_color to configure the fog color. This is + * the standard fogging formula. + * + * If you want, you can instead build a custom fogging formula + * using #RDPQ_BLENDER. Notice that rdpq_mode assumes that the formula + * that you set with rdpq_mode_fog does not blend with the background; for + * that, use #rdpq_mode_blender. + * + * To disable fog, call #rdpq_mode_fog passing 0. + * + * @note Fogging uses one pass of the blender unit (the first), + * so this can coexist with a blending formula (#rdpq_mode_blender) + * as long as it's a single pass one (created via #RDPQ_BLENDER). + * If a two-pass blending formula (#RDPQ_BLENDER2) was set with + * #rdpq_mode_blender, fogging cannot be used. + * + * @param fog Fog formula created with #RDPQ_BLENDER, + * or 0 to disable. + * + * @see #RDPQ_FOG_STANDARD + * @see #rdpq_set_fog_color + * @see #RDPQ_BLENDER + * @see #rdpq_mode_blender + */ +inline void rdpq_mode_fog(rdpq_blender_t fog) { + extern void __rdpq_fixup_mode(uint32_t cmd_id, uint32_t w0, uint32_t w1); + if (fog) fog |= SOM_BLENDING; + if (fog) assertf((fog & SOMX_BLEND_2PASS) == 0, "Fogging cannot be used with two-pass blending formulas"); + __rdpq_mode_change_som(SOMX_FOG, fog ? SOMX_FOG : 0); + __rdpq_fixup_mode(RDPQ_CMD_SET_FOG_MODE, 0, fog); +} + +/** + * @brief Change dithering mode + * + * This function allows to change the dithering algorithm performed by + * RDP on RGB and alpha channels. Note that by default, #rdpq_set_mode_standard + * disables any dithering. + * + * See #rdpq_dither_t for an explanation of how RDP applies dithering and + * how the different dithering algorithms work. + * + * @param dither Dithering to perform + * + * @see #rdpq_dither_t + */ +inline void rdpq_mode_dithering(rdpq_dither_t dither) { + __rdpq_mode_change_som( + SOM_RGBDITHER_MASK | SOM_ALPHADITHER_MASK, ((uint64_t)dither << SOM_ALPHADITHER_SHIFT)); +} + +/** + * @brief Activate alpha compare feature + * + * This function activates the alpha compare feature. It allows to do per-pixel + * rejection (masking) depending on the value of the alpha component of the pixel. + * The value output from the combiner is compared with a configured threshold + * and if the value is lower, the pixel is not written to the framebuffer. + * + * Moreover, RDP also support a random noise alpha compare mode, where the threshold + * value is calculated as a random number for each pixel. This can be used for special + * graphic effects. + * + * @note Alpha compare becomes more limited if antialiasing is enabled (both full and reduced, + * see #rdpq_mode_antialias). In that case, any threshold value not equal to 0 will + * internally be treated as if 255 was specified. This implies that noise-based + * alpha compare is not supported under this condition. + * + * @param threshold Threshold value. All pixels whose alpha is less than this threshold + * will not be drawn. Use 0 to disable. Use a negative value for + * activating the noise-based alpha compare. + */ +inline void rdpq_mode_alphacompare(int threshold) { + if (threshold == 0) { + __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, 0); + } else if (threshold > 0) { + __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, SOM_ALPHACOMPARE_THRESHOLD); + rdpq_set_blend_color(RGBA32(0,0,0,threshold)); + } else { + __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, SOM_ALPHACOMPARE_NOISE); + } +} + +/** + * @brief Activate z-buffer usage + * + * Activate usage of Z-buffer. The Z-buffer surface must be configured + * via #rdpq_set_z_image. + * + * It is possible to separately activate the depth comparison + * (*reading* from the Z-buffer) and the Z update (*writing* to + * the Z-buffer). + * + * @param compare True if per-pixel depth test must be performed + * @param update True if per-pixel depth write must be performed + * + * @see #rdpq_set_z_image + */ +inline void rdpq_mode_zbuf(bool compare, bool update) { + __rdpq_mode_change_som( + SOM_Z_COMPARE | SOM_Z_WRITE, + (compare ? SOM_Z_COMPARE : 0) | + (update ? SOM_Z_WRITE : 0) + ); +} + +/** + * @brief Set a fixed override of Z value + * + * This function activates a special mode in which RDP will use a fixed value + * of Z for the next drawn primitives. This works with both rectangles + * (#rdpq_fill_rectangle and #rdpq_texture_rectangle) and triangles + * (#rdpq_triangle). + * + * If a triangle is drawn with per-vertex Z while the Z-override is active, + * the per-vertex Z will be ignored. + * + * @param enable Enable/disable the Z-override mode + * @param z Z value to use (range 0..1) + * @param deltaz DeltaZ value to use. + * + * @see #rdpq_set_prim_depth_raw + */ +inline void rdpq_mode_zoverride(bool enable, float z, int16_t deltaz) { + if (enable) rdpq_set_prim_depth_raw(z * 0x7FFF, deltaz); + __rdpq_mode_change_som( + SOM_ZSOURCE_PRIM, enable ? SOM_ZSOURCE_PRIM : 0 + ); +} + + +/** + * @brief Activate palette lookup during drawing + * + * This function allows to enable / disable palette lookup during + * drawing. To draw using a texture with palette, it is necessary + * to first load the texture into TMEM (eg: via #rdpq_tex_upload), + * then load the palette (eg: via #rdpq_tex_upload_tlut), + * and finally activate the palette drawing mode via #rdpq_mode_tlut. + * + * @param tlut Palette type, or 0 to disable. + * + * @see #rdpq_tex_upload + * @see #rdpq_tex_upload_tlut + * @see #rdpq_tlut_t + */ +inline void rdpq_mode_tlut(rdpq_tlut_t tlut) { + // This assert is useful to catch the common mistake of rdpq_mode_tlut(true) + assertf(tlut == TLUT_NONE || tlut == TLUT_RGBA16 || tlut == TLUT_IA16, "invalid TLUT type"); + __rdpq_mode_change_som(SOM_TLUT_MASK, (uint64_t)tlut << SOM_TLUT_SHIFT); +} + +/** + * @brief Activate texture filtering + * + * This function allows to configure the kind of texture filtering that will be used + * while sampling textures. + * + * Available in render modes: standard, copy. + * + * @param filt Texture filtering type + * + * @see #rdpq_filter_t + */ +inline void rdpq_mode_filter(rdpq_filter_t filt) { + __rdpq_mode_change_som(SOM_SAMPLE_MASK, (uint64_t)filt << SOM_SAMPLE_SHIFT); +} + +/** + * @brief Activate mip-mapping. + * + * This function can be used to turn on mip-mapping. + * + * TMEM must have been loaded with multiple level of details (LOds) of the texture + * (a task for which rdpq is currently missing a helper, so it has to be done manually). + * Also, multiple consecutive tile descriptors (one for each LOD) must have been configured. + * + * If you call #rdpq_triangle when mipmap is active via #rdpq_mode_mipmap, pass 0 + * to the number of mipmaps in #rdpq_trifmt_t, as the number of levels set here + * will win over it. + * + * @param mode Mipmapping mode (use #MIPMAP_NONE to disable) + * @param num_levels Number of mipmap levels to use. Pass 0 when setting MIPMAP_NONE. + */ +inline void rdpq_mode_mipmap(rdpq_mipmap_t mode, int num_levels) { + if (mode == MIPMAP_NONE) + num_levels = 0; + if (num_levels) + num_levels -= 1; + __rdpq_mode_change_som(SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE | SOMX_NUMLODS_MASK | SOM_TEXTURE_SHARPEN | SOM_TEXTURE_DETAIL, + ((uint64_t)mode << 32) | ((uint64_t)num_levels << SOMX_NUMLODS_SHIFT)); +}; + +/** + * @brief Activate perspective correction for textures + * + * This function enables or disables the perspective correction for texturing. + * Perspective correction does not slow down rendering, and thus it is basically + * free. + * + * To be able to use perspective correction, make sure to pass the Z and W values + * in the triangle vertices. + * + * @param perspective True to activate perspective correction, false to disable it. + */ +inline void rdpq_mode_persp(bool perspective) +{ + __rdpq_mode_change_som(SOM_TEXTURE_PERSP, perspective ? SOM_TEXTURE_PERSP : 0); +} + +/** @} */ + +/** + * @brief Start a batch of RDP mode changes + * + * This function can be used as an optimization when changing render mode + * and/or multiple render states. It allows to batch the changes, so that + * RDP hardware registers are updated only once. + * + * To use it, put a call to #rdpq_mode_begin and #rdpq_mode_end around + * the mode functions that you would like to batch. For instance: + * + * @code{.c} + * rdpq_mode_begin(); + * rdpq_set_mode_standard(); + * rdpq_mode_mipmap(MIPMAP_INTERPOLATE, 2); + * rdpq_mode_dithering(DITHER_SQUARE_SQUARE); + * rdpq_mode_blender(RDPQ_BLENDING_MULTIPLY); + * rdpq_mode_end(); + * @endcode + * + * The only effect of using #rdpq_mode_begin is more efficient RSP + * and RDP usage, there is no semantic change in the way RDP is + * programmed when #rdpq_mode_end is called. + * + * @note The functions affected by #rdpq_mode_begin / #rdpq_mode_end + * are just those that are part of the mode API (that is, + * `rdpq_set_mode_*` and `rdpq_mode_*`). Any other function + * is not batched and will be issued immediately. + */ +void rdpq_mode_begin(void); + +/** + * @brief Finish a batch of RDP mode changes + * + * This function completes a batch of changes started with #rdpq_mode_begin. + * + * @see #rdpq_mode_begin + */ +void rdpq_mode_end(void); + +/******************************************************************** + * Internal functions (not part of public API) + ********************************************************************/ + +///@cond +inline void __rdpq_mode_change_som(uint64_t mask, uint64_t val) +{ + // This is identical to #rdpq_change_other_modes_raw, but we also + // set bit 1<<15 in the first word. That flag tells the RSP code + // to recalculate the render mode, in addition to flipping the bits. + // #rdpq_change_other_modes_raw instead just changes the bits as + // you would expect from a raw API. + extern void __rdpq_fixup_mode3(uint32_t cmd_id, uint32_t w0, uint32_t w1, uint32_t w2); + if (mask >> 32) + __rdpq_fixup_mode3(RDPQ_CMD_MODIFY_OTHER_MODES, 0 | (1<<15), ~(mask >> 32), val >> 32); + if ((uint32_t)mask) + __rdpq_fixup_mode3(RDPQ_CMD_MODIFY_OTHER_MODES, 4 | (1<<15), ~(uint32_t)mask, (uint32_t)val); +} +///@endcond + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_rect.h b/include/rdpq_rect.h new file mode 100644 index 0000000000..afc7d8f895 --- /dev/null +++ b/include/rdpq_rect.h @@ -0,0 +1,403 @@ +/** + * @file rdpq_rect.h + * @brief RDP Command queue + * @ingroup rdpq + */ + +#ifndef LIBDRAGON_RDPQ_RECT_H +#define LIBDRAGON_RDPQ_RECT_H + +#include "rdpq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Internal functions used for inline optimizations. Not part of the public API. +// Do not call directly +/// @cond +#define __UNLIKELY(x) __builtin_expect(!!(x), 0) + +__attribute__((always_inline)) +inline void __rdpq_fill_rectangle_inline(int32_t x0, int32_t y0, int32_t x1, int32_t y1) { + if (__UNLIKELY(x0 < 0)) x0 = 0; + if (__UNLIKELY(y0 < 0)) y0 = 0; + if (__UNLIKELY(x1 > 0xFFF)) x1 = 0xFFF; + if (__UNLIKELY(y1 > 0xFFF)) y1 = 0xFFF; + if (__UNLIKELY(x0 >= x1 || y0 >= y1)) return; + + extern void __rdpq_fill_rectangle(uint32_t w0, uint32_t w1); + __rdpq_fill_rectangle( + _carg(x1, 0xFFF, 12) | _carg(y1, 0xFFF, 0), + _carg(x0, 0xFFF, 12) | _carg(y0, 0xFFF, 0)); +} + +__attribute__((always_inline)) +inline void __rdpq_texture_rectangle_inline(rdpq_tile_t tile, + int32_t x0, int32_t y0, int32_t x1, int32_t y1, + int32_t s0, int32_t t0) +{ + if (__UNLIKELY(x1 == x0 || y1 == y0)) return; + int32_t dsdx = 1<<10, dtdy = 1<<10; + + if (__UNLIKELY(x0 > x1)) { + int32_t tmp = x0; x0 = x1; x1 = tmp; + x0 += 4; x1 += 4; + s0 += (x1 - x0 - 4) << 3; + dsdx = -dsdx; + } + if (__UNLIKELY(y0 > y1)) { + int32_t tmp = y0; y0 = y1; y1 = tmp; + y0 += 4; y1 += 4; + t0 += (y1 - y0 - 4) << 3; + dtdy = -dtdy; + } + if (__UNLIKELY(x0 < 0)) { + s0 -= (x0 * dsdx) >> 7; + x0 = 0; + if (__UNLIKELY(x0 >= x1)) return; + } + if (__UNLIKELY(y0 < 0)) { + t0 -= (y0 * dtdy) >> 7; + y0 = 0; + if (__UNLIKELY(y0 >= y1)) return; + } + if (__UNLIKELY(x1 > 1024*4-1)) { + x1 = 1024*4-1; + if (__UNLIKELY(x0 >= x1)) return; + } + if (__UNLIKELY(y1 > 1024*4-1)) { + y1 = 1024*4-1; + if (__UNLIKELY(y0 >= y1)) return; + } + + extern void __rdpq_texture_rectangle(uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3); + __rdpq_texture_rectangle( + _carg(x1, 0xFFF, 12) | _carg(y1, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(x0, 0xFFF, 12) | _carg(y0, 0xFFF, 0), + _carg(s0, 0xFFFF, 16) | _carg(t0, 0xFFFF, 0), + _carg(dsdx, 0xFFFF, 16) | _carg(dtdy, 0xFFFF, 0)); +} + +__attribute__((always_inline)) +inline void __rdpq_texture_rectangle_scaled_inline(rdpq_tile_t tile, + int32_t x0, int32_t y0, int32_t x1, int32_t y1, + int32_t s0, int32_t t0, int32_t s1, int32_t t1) +{ + if (__UNLIKELY(x1 == x0 || y1 == y0)) return; + int32_t dsdx = ((s1 - s0) << 7) / (x1 - x0), dtdy = ((t1 - t0) << 7) / (y1 - y0); + + if (__UNLIKELY(x0 > x1)) { + int32_t tmp = x0; x0 = x1; x1 = tmp; + s0 += ((x0 - x1 - 4) * dsdx) >> 7; + } + if (__UNLIKELY(y0 > y1)) { + int32_t tmp = y0; y0 = y1; y1 = tmp; + t0 += ((y0 - y1 - 4) * dtdy) >> 7; + } + if (__UNLIKELY(x0 < 0)) { + s0 -= (x0 * dsdx) >> 7; + x0 = 0; + if (__UNLIKELY(x0 >= x1)) return; + } + if (__UNLIKELY(y0 < 0)) { + t0 -= (y0 * dtdy) >> 7; + y0 = 0; + if (__UNLIKELY(y0 >= y1)) return; + } + if (__UNLIKELY(x1 > 1024*4-1)) { + s1 -= ((x1 - 1024*4-1) * dsdx) >> 7; + x1 = 1024*4-1; + if (__UNLIKELY(x0 >= x1)) return; + } + if (__UNLIKELY(y1 > 1024*4-1)) { + t1 -= ((y1 - 1024*4-1) * dtdy) >> 7; + y1 = 1024*4-1; + if (__UNLIKELY(y0 >= y1)) return; + } + + extern void __rdpq_texture_rectangle(uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3); + __rdpq_texture_rectangle( + _carg(x1, 0xFFF, 12) | _carg(y1, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(x0, 0xFFF, 12) | _carg(y0, 0xFFF, 0), + _carg(s0, 0xFFFF, 16) | _carg(t0, 0xFFFF, 0), + _carg(dsdx, 0xFFFF, 16) | _carg(dtdy, 0xFFFF, 0)); +} + +inline void __rdpq_fill_rectangle_fx(int32_t x0, int32_t y0, int32_t x1, int32_t y1) +{ + if (__builtin_constant_p(x0) && __builtin_constant_p(y0) && __builtin_constant_p(x1) && __builtin_constant_p(y1)) { + __rdpq_fill_rectangle_inline(x0, y0, x1, y1); + } else { + extern void __rdpq_fill_rectangle_offline(int32_t x0, int32_t y0, int32_t x1, int32_t y1); + __rdpq_fill_rectangle_offline(x0, y0, x1, y1); + } +} + +inline void __rdpq_texture_rectangle_fx(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s, int32_t t) +{ + if (__builtin_constant_p(x0) && __builtin_constant_p(y0) && __builtin_constant_p(x1) && __builtin_constant_p(y1)) { + __rdpq_texture_rectangle_inline(tile, x0, y0, x1, y1, s, t); + } else { + extern void __rdpq_texture_rectangle_offline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0); + __rdpq_texture_rectangle_offline(tile, x0, y0, x1, y1, s, t); + } +} + +inline void __rdpq_texture_rectangle_scaled_fx(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0, int32_t s1, int32_t t1) +{ + if (__builtin_constant_p(x0) && __builtin_constant_p(y0) && __builtin_constant_p(x1) && __builtin_constant_p(y1)) { + __rdpq_texture_rectangle_scaled_inline(tile, x0, y0, x1, y1, s0, t0, s1, t1); + } else { + extern void __rdpq_texture_rectangle_scaled_offline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0, int32_t s1, int32_t t1); + __rdpq_texture_rectangle_scaled_offline(tile, x0, y0, x1, y1, s0, t0, s1, t1); + } +} + +inline void __rdpq_texture_rectangle_raw_fx(rdpq_tile_t tile, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint16_t s0, uint16_t t0, int16_t dsdx, int16_t dtdy) +{ + extern void __rdpq_texture_rectangle(uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3); + __rdpq_texture_rectangle( + _carg(x1, 0xFFF, 12) | _carg(y1, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(x0, 0xFFF, 12) | _carg(y0, 0xFFF, 0), + _carg(s0, 0xFFFF, 16) | _carg(t0, 0xFFFF, 0), + _carg(dsdx, 0xFFFF, 16) | _carg(dtdy, 0xFFFF, 0)); +} + +inline void __rdpq_texture_rectangle_flip_raw_fx(rdpq_tile_t tile, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, int16_t s, int16_t t, int16_t dsdy, int16_t dtdx) +{ + extern void __rdpq_write16_syncuse(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); + + // Note that this command is broken in copy mode, so it doesn't + // require any fixup. The RSP will trigger an assert if this + // is called in such a mode. + __rdpq_write16_syncuse(RDPQ_CMD_TEXTURE_RECTANGLE_FLIP, + _carg(x1, 0xFFF, 12) | _carg(y1, 0xFFF, 0), + _carg(tile, 0x7, 24) | _carg(x0, 0xFFF, 12) | _carg(y0, 0xFFF, 0), + _carg(s, 0xFFFF, 16) | _carg(t, 0xFFFF, 0), + _carg(dsdy, 0xFFFF, 16) | _carg(dtdx, 0xFFFF, 0), + AUTOSYNC_PIPE | AUTOSYNC_TILE(tile) | AUTOSYNC_TMEM(0)); +} +#undef __UNLIKELY +/// @endcond + +/** + * @name Standard rectangle functions + * + * These functions can be used to directly draw filled and/or textured rectangles + * on the screen. While a rectangle can always be drawn via two triangles, + * directly invoking the rectangle functions when possible is more efficient on + * both the CPU and the RDP. + * + * The functions are defined as macros so that they can efficiently accept either + * integers or floating point values. Usage of fractional values is required for + * subpixel precision. + * + * \{ + */ + +/** + * @brief Draw a filled rectangle (RDP command: FILL_RECTANGLE) + * + * This command is used to render a rectangle filled with a solid color. + * The color must have been configured via #rdpq_set_fill_color, and the + * render mode should be set to FILL via #rdpq_set_mode_fill. + * + * The rectangle must be defined using exclusive bottom-right bounds, so for + * instance `rdpq_fill_rectangle(10,10,30,30)` will draw a square of exactly + * 20x20 pixels. + * + * Fractional values can be used, and will create a semi-transparent edge. For + * instance, `rdpq_fill_rectangle(9.75, 9.75, 30.25, 30.25)` will create a 22x22 pixel + * square, with the most external pixel rows and columns having a alpha of 25%. + * This obviously makes more sense in RGBA32 mode where there is enough alpha + * bitdepth to appreciate the result. Make sure to configure the blender via + * #rdpq_mode_blender (part of the mode API) or via the lower-level #rdpq_set_other_modes_raw, + * to decide the blending formula. + * + * @code{.c} + * // Fill the screen with red color. + * rdpq_set_mode_fill(RGBA32(255, 0, 0, 0)); + * rdpq_fill_rectangle(0, 0, 320, 240); + * @endcode + * + * + * @param[x0] x0 Top-left X coordinate of the rectangle (integer or float) + * @param[y0] y0 Top-left Y coordinate of the rectangle (integer or float) + * @param[x1] x1 Bottom-right *exclusive* X coordinate of the rectangle (integer or float) + * @param[y1] y1 Bottom-right *exclusive* Y coordinate of the rectangle (integer or float) + * + * @see rdpq_set_fill_color + * @see rdpq_set_fill_color_stripes + * + * @hideinitializer + */ +#define rdpq_fill_rectangle(x0, y0, x1, y1) ({ \ + __rdpq_fill_rectangle_fx((x0)*4, (y0)*4, (x1)*4, (y1)*4); \ +}) + +/** + * @brief Draw a textured rectangle (RDP command: TEXTURE_RECTANGLE) + * + * This function enqueues a RDP TEXTURE_RECTANGLE command, that allows to draw a + * textured rectangle onto the framebuffer (similar to a sprite). + * + * The texture must have been already loaded into TMEM via #rdpq_load_tile or + * #rdpq_load_block, and a tile descriptor referring to it must be passed to this + * function. + * + * Input X and Y coordinates are automatically clipped to the screen boundaries (and + * then scissoring also takes effect), so there is no specific range + * limit to them. On the contrary, S and T coordinates have a specific range + * (-1024..1024). + * + * When x0 > x1 or y0 > y1, the rectangle is drawn flipped (mirrored) on either + * axis (or both, which basically rotates it by 180° instead). + * + * Before calling this function, make sure to also configure an appropriate + * render mode. It is possible to use the fast copy mode (#rdpq_set_mode_copy) with + * this function, assuming that advanced blending or color combiner capabilities + * are not needed. The copy mode can in fact just blit the pixels from the texture + * unmodified, applying only a per-pixel rejection to mask out transparent pixels + * (via alpha compare). See #rdpq_set_mode_copy for more information. + * + * Alternatively, it is possible to use this command also in standard render mode + * (#rdpq_set_mode_standard), with all the per-pixel blending / combining features. + * + * Normally, rectangles are drawn without any respect for the z-buffer (if any is + * configured). The only option here is to provide a single Z value valid for the + * whole rectangle by using #rdpq_mode_zoverride in the mode API + * (or manually calling #rdpq_set_prim_depth_raw). In fact, it is not possible + * to specify a per-vertex Z value. + * + * Similarly, it is not possible to specify a per-vertex color/shade value, but + * instead it is possible to setup a combiner that applies a fixed color to the + * pixels of the rectangle (eg: #RDPQ_COMBINER_TEX_FLAT). + * + * If you need a full Z-buffering or shading support, an alternative is to + * call #rdpq_triangle instead, and thus draw the rectangles as two triangles. + * This will however incur in more overhead on the CPU to setup the primitives. + * + * @param[in] tile Tile descriptor referring to the texture in TMEM to use for drawing + * @param[in] x0 Top-left X coordinate of the rectangle + * @param[in] y0 Top-left Y coordinate of the rectangle + * @param[in] x1 Bottom-right *exclusive* X coordinate of the rectangle + * @param[in] y1 Bottom-right *exclusive* Y coordinate of the rectangle + * @param[in] s S coordinate of the texture at the top-left corner (range: -1024..1024) + * @param[in] t T coordinate of the texture at the top-left corner (range: -1024..1024) + * + * @hideinitializer + */ +// NOTE: we use a macro here to support both integer and float inputs without ever forcing +// a useless additional conversion. +#define rdpq_texture_rectangle(tile, x0, y0, x1, y1, s, t) \ + __rdpq_texture_rectangle_fx((tile), (x0)*4, (y0)*4, (x1)*4, (y1)*4, (s)*32, (t)*32) + +/** + * @brief Draw a textured rectangle with scaling (RDP command: TEXTURE_RECTANGLE) + * + * This function is similar to #rdpq_texture_rectangle but allows the rectangle + * to be scaled horizontally and/or vertically, by specifying both the source + * rectangle in the texture, and the rectangle on the screen. + * + * Refer to #rdpq_texture_rectangle for more details on how this command works. + * + * @param[in] tile Tile descriptor referring to the texture in TMEM to use for drawing + * @param[in] x0 Top-left X coordinate of the rectangle + * @param[in] y0 Top-left Y coordinate of the rectangle + * @param[in] x1 Bottom-right *exclusive* X coordinate of the rectangle + * @param[in] y1 Bottom-right *exclusive* Y coordinate of the rectangle + * @param[in] s0 S coordinate of the texture at the top-left corner (range: -1024..1024) + * @param[in] t0 T coordinate of the texture at the top-left corner (range: -1024..1024) + * @param[in] s1 S coordinate of the texture at the bottom-right corner (exclusive) (range: -1024..1024) + * @param[in] t1 T coordinate of the texture at the bottom-right corner (exclusive) (range: -1024..1024) + * + * @hideinitializer + */ +#define rdpq_texture_rectangle_scaled(tile, x0, y0, x1, y1, s0, t0, s1, t1) \ + __rdpq_texture_rectangle_scaled_fx((tile), (x0)*4, (y0)*4, (x1)*4, (y1)*4, (s0)*32, (t0)*32, (s1)*32, (t1)*32) + + +/** + * \} + * + * @name Raw rectangle functions + * + * These functions are similar to the above ones, but they closely match the hardware + * commands to be sent to RDP. They are exposed for completeness, but most users + * should use the standard ones, as they provide a easier and more consistent API. + * + * The main differences are that these functions accept only positive integers (so clipping + * on negative numbers should be performed by the caller, if needed), and the textured + * functions need the per-pixel horizontal and vertical increments. + * + * \{ + */ + +/** + * @brief Draw a textured rectangle with scaling -- raw version (RDP command: TEXTURE_RECTANGLE) + * + * This function is similar to #rdpq_texture_rectangle but it does not perform any + * preprocessing on the input coordinates. Most users should use #rdpq_texture_rectangle + * or #rdpq_texture_rectangle_scaled instead. + * + * Refer to #rdpq_texture_rectangle for more details on how this command works. + * + * @param tile Tile descriptor referring to the texture in TMEM to use for drawing + * @param x0 Top-left X coordinate of the rectangle (range: 0..1024) + * @param y0 Top-left Y coordinate of the rectangle (range: 0..1024) + * @param x1 Bottom-right *exclusive* X coordinate of the rectangle (range: 0..1024) + * @param y1 Bottom-right *exclusive* Y coordinate of the rectangle (range: 0..1024) + * @param s0 S coordinate of the texture at the top-left corner (range: -1024..1024) + * @param t0 T coordinate of the texture at the top-left corner (range: -1024..1024) + * @param dsdx Horizontal increment of S coordinate per pixel (range: -32..32) + * @param dtdy Vertical increment of T coordinate per pixel (range: -32..32) + * + * @see #rdpq_texture_rectangle + * @see #rdpq_texture_rectangle_scaled + * + * @hideinitializer + */ +#define rdpq_texture_rectangle_raw(tile, x0, y0, x1, y1, s0, t0, dsdx, dtdy) \ + __rdpq_texture_rectangle_raw_fx(tile, (x0)*4, (y0)*4, (x1)*4, (y1)*4, (s0)*32, (t0)*32, (dsdx)*1024, (dtdy)*1024) + + +/** + * @brief Draw a textured flipped rectangle (RDP command: TEXTURE_RECTANGLE_FLIP) + * + * The RDP command TEXTURE_RECTANGLE_FLIP is similar to TEXTURE_RECTANGLE, but the + * texture S coordinate is incremented over the Y axis, while the texture T coordinate + * is incremented over the X axis. The graphical effect is similar to a 90° degree + * rotation plus a mirroring of the texture. + * + * Notice that this command cannot work in COPY mode, so the standard render mode + * must be activated (via #rdpq_set_mode_standard). + * + * Refer to #rdpq_texture_rectangle_raw for further information. + * + * @param[in] tile Tile descriptor referring to the texture in TMEM to use for drawing + * @param[in] x0 Top-left X coordinate of the rectangle + * @param[in] y0 Top-left Y coordinate of the rectangle + * @param[in] x1 Bottom-right *exclusive* X coordinate of the rectangle + * @param[in] y1 Bottom-right *exclusive* Y coordinate of the rectangle + * @param[in] s S coordinate of the texture at the top-left corner + * @param[in] t T coordinate of the texture at the top-left corner + * @param[in] dsdy Signed increment of S coordinate for each vertical pixel. + * @param[in] dtdx Signed increment of T coordinate for each horizontal pixel. + * + * @hideinitializer + */ +#define rdpq_texture_rectangle_flip_raw(tile, x0, y0, x1, y1, s, t, dsdy, dtdx) ({ \ + __rdpq_texture_rectangle_flip_raw_fx((tile), (x0)*4, (y0)*4, (x1)*4, (y1)*4, (s)*32, (t)*32, (dsdy)*1024, (dtdx)*1024); \ +}) + + +/** + * \} + */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_sprite.h b/include/rdpq_sprite.h new file mode 100644 index 0000000000..812072cb5e --- /dev/null +++ b/include/rdpq_sprite.h @@ -0,0 +1,127 @@ +/** + * @file rdpq_sprite.h + * @brief RDP Command queue: high-level sprite loading and blitting + * @ingroup rdpq + * + * This file contains high-level functions for uploading and drawing sprites. + * They are similar in nature to the functions in rdpq_tex.h, but they should + * be preferred when manipulating sprites as they can benefit from advanced + * functionality such as optimized sprites, mipmapping, palette configuration, etc. + */ + +#ifndef LIBDRAGON_RDPQ_SPRITE_H +#define LIBDRAGON_RDPQ_SPRITE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +///@cond +typedef struct sprite_s sprite_t; +typedef struct rdpq_texparms_s rdpq_texparms_t; +typedef struct rdpq_blitparms_s rdpq_blitparms_t; +///@endcond + +/** + * @brief Upload a sprite to TMEM, making it ready for drawing + * + * This function will upload a sprite to TMEM, making it ready for drawing. + * It is similar to #rdpq_tex_upload which can be used for any surface, but + * it builds upon it with sprite-specific features: + * + * * If the sprite contains mipmaps, the whole mipmap chain is uploaded to TMEM + * as well. Moreover, mipmaps are automatically enabled in the render mode + * (via #rdpq_mode_mipmap). + * * If the sprite contains a palette, it is uploaded to TMEM as well, and the + * palette is also activated in the render mode (via #rdpq_mode_tlut). + * * If the sprite is optimized (via mksprite --optimize), the upload function + * will be faster. + * + * After calling this function, the specified tile descriptor will be ready + * to be used in drawing primitives like #rdpq_triangle or #rdpq_texture_rectangle. + * + * This function is meant for sprites that can be loaded in full into TMEM; it + * will assert if the sprite does not fit TMEM. For larger sprites, either + * use #rdpq_sprite_blit to directly draw then (handling partial uploads transparently), + * or use #rdpq_tex_upload_sub to manually upload a smaller portion of the sprite. + * + * To load multiple sprites in TMEM at once (for instance, for multitexturing), + * you can manually specify the @p parms->tmem_addr for the second sprite, or + * call #rdpq_tex_multi_begin / #rdpq_tex_multi_end around multiple calls to + * #rdpq_sprite_upload. For instance: + * + * @code{.c} + * // Load multiple sprites in TMEM, with auto-TMEM allocation. + * rdpq_tex_multi_begin(); + * rdpq_sprite_upload(TILE0, sprite0, NULL); + * rdpq_sprite_upload(TILE1, sprite1, NULL); + * rdpq_tex_multi_end(); + * @endcode + * + * To speed up loading of a sprite, you can record the loading sequence in + * a rspq block and replay it any time later. For instance: + * + * @code{.c} + * sprite_t *hero = sprite_load("rom:/hero.sprite"); + * + * // Record the loading sequence in a rspq block + * rspq_block_begin(); + * rdpq_sprite_upload(TILE0, hero, NULL); + * rspq_block_t *hero_load = rspq_block_end(); + * + * // Later, load the sprite + * rspq_block_run(hero_load); + * + * // Remember to free the block when you don't need it anymore + * rspq_wait(); // wait until RSP is idle + * rspq_block_free(hero_load); + * sprite_free(hero); + * @endcode + * + * @param tile Tile descriptor that will be initialized with this sprite + * @param sprite Sprite to upload + * @param parms Texture upload parameters to use + * @return Number of bytes used in TMEM for this sprite (excluding palette) + * + * @see #rdpq_tex_upload + * @see #rdpq_tex_upload_sub + * @see #rdpq_sprite_blit + */ +int rdpq_sprite_upload(rdpq_tile_t tile, sprite_t *sprite, const rdpq_texparms_t *parms); + + +/** + * @brief Blit a sprite to the active framebuffer + * + * This function will perform a blit of a sprite to the active framebuffer, + * with several features like source rectangle selection, scaling, rotation, etc. + * + * The function is similar to #rdpq_tex_blit, but it works on a sprite rather than + * a generic surface. In addition to the standard features of #rdpq_tex_blit, + * it will also handle sprite-specific features: + * + * * If the sprite contains a palette, it is uploaded to TMEM as well, and the + * palette is also activated in the render mode (via #rdpq_mode_tlut). + * * If the sprite is optimized (via mksprite --optimize), the upload function + * will be faster. + * + * Just like #rdpq_tex_blit, this function is designed to work with sprites of + * arbitrary sizes; those that won't fit in TMEM will be automatically split + * in multiple chunks to perform the requested operation. + * + * Please refer to #rdpq_tex_blit for a full overview of the features. + * + * @param sprite Sprite to blit + * @param x0 X coordinate on the framebuffer where to draw the surface + * @param y0 Y coordinate on the framebuffer where to draw the surface + * @param parms Parameters for the blit operation (or NULL for default) + */ +void rdpq_sprite_blit(sprite_t *sprite, float x0, float y0, const rdpq_blitparms_t *parms); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_tex.h b/include/rdpq_tex.h new file mode 100644 index 0000000000..13fd673ac0 --- /dev/null +++ b/include/rdpq_tex.h @@ -0,0 +1,421 @@ +/** + * @file rdpq_tex.h + * @brief RDP Command queue: high-level texture/surface loading and blitting + * @ingroup rdpq + */ + +#ifndef LIBDRAGON_RDPQ_TEX_H +#define LIBDRAGON_RDPQ_TEX_H + +#include "rdpq.h" +#include + +///@cond +typedef struct surface_s surface_t; +///@endcond + +#ifdef __cplusplus +extern "C" { +#endif + + +/// Enable mirroring when wrapping the texture, used in #rdpq_texparms_t +#define MIRROR_REPEAT true +/// Disable mirroring when wrapping the texture, used in #rdpq_texparms_t +#define MIRROR_NONE false +/// Enable infinite repeat for the texture, used in #rdpq_texparms_t +#define REPEAT_INFINITE 2048 + +/** + * @brief Texture sampling parameters for #rdpq_tex_upload. + * + * This structure contains all possible parameters for #rdpq_tex_upload. + * All fields have been made so that the 0 value is always the most + * reasonable default. This means that you can simply initialize the structure + * to 0 and then change only the fields you need (for instance, through a + * compound literal). + * + */ +typedef struct rdpq_texparms_s { + int tmem_addr; ///< TMEM address where to load the texture (default: 0) + int palette; ///< Palette number where TLUT is stored (used only for CI4 textures) + + struct { + float translate; ///< Translation of the texture (in pixels) + int scale_log; ///< Power of 2 scale modifier of the texture (default: 0). Eg: -2 = make the texture 4 times smaller + + float repeats; ///< Number of repetitions before the texture clamps (default: 1). Use #REPEAT_INFINITE for infinite repetitions (wrapping) + bool mirror; ///< Repetition mode (default: MIRROR_NONE). If true (MIRROR_REPEAT), the texture mirrors at each repetition + } s, t; // S/T directions of texture parameters + +} rdpq_texparms_t; + +// Multi-pass optimized texture loader +// Not part of the public API yet +///@cond +enum tex_load_mode { + TEX_LOAD_UNKNOWN, + TEX_LOAD_TILE, + TEX_LOAD_BLOCK, +}; + +typedef struct tex_loader_s { + const surface_t *tex; + rdpq_tile_t tile; + const rdpq_texparms_t *texparms; + rdpq_tileparms_t tileparms; + struct { + int width, height; + int num_texels, tmem_pitch; + int block_max_lines; + bool can_load_block; + int s0fx, t0fx, s1fx, t1fx; + } rect; + int tmem_addr; + enum tex_load_mode load_mode; + void (*load_block)(struct tex_loader_s *tload, int s0, int t0, int s1, int t1); + void (*load_tile)(struct tex_loader_s *tload, int s0, int t0, int s1, int t1); +} tex_loader_t; +tex_loader_t tex_loader_init(rdpq_tile_t tile, const surface_t *tex); +int tex_loader_load(tex_loader_t *tload, int s0, int t0, int s1, int t1); +void tex_loader_set_tmem_addr(tex_loader_t *tload, int tmem_addr); +int tex_loader_calc_max_height(tex_loader_t *tload, int width); +///@endcond + + +/** + * @brief Load a texture into TMEM + * + * This function helps loading a texture into TMEM, which normally involves: + * + * * Configuring a tile descriptor (via #rdpq_set_tile) + * * Setting the source texture image (via #rdpq_set_texture_image) + * * Loading the texture (via #rdpq_load_tile or #rdpq_load_block) + * + * After calling this function, the specified tile descriptor will be ready + * to be used in drawing primitives like #rdpq_triangle or #rdpq_texture_rectangle. + * + * If the texture uses a palette (#FMT_CI8 or #FMT_CI4), the tile descriptor + * will be by default pointing to palette 0. In the case of #FMT_CI4, this + * might not be the correct palette; to specify a different palette number, + * add .palette = X to the tex parms. Before drawing a texture with palette, + * remember to call #rdpq_mode_tlut to activate palette mode. + * + * If you want to load a portion of a texture rather than the full texture, + * use #rdpq_tex_upload_sub, or alternatively create a sub-surface using + * #surface_make_sub and pass it to #rdpq_tex_upload. See #rdpq_tex_upload_sub + * for an example of both techniques. + * + * @param tile Tile descriptor that will be initialized with this texture + * @param tex Surface containing the texture to load + * @param parms All optional parameters on where to load the texture and how to sample it. Refer to #rdpq_texparms_t for more information. + * @return Number of bytes used in TMEM for this texture + * + * @see #rdpq_tex_upload_sub + * @see #surface_make_sub + */ +int rdpq_tex_upload(rdpq_tile_t tile, const surface_t *tex, const rdpq_texparms_t *parms); + +/** + * @brief Load a portion of texture into TMEM + * + * This function is similar to #rdpq_tex_upload, but only loads a portion of a texture + * in TMEM. The portion is specified as a rectangle (with exclusive bounds) that must + * be contained within the original texture. + * + * Notice that, after calling this function, you must draw the polygon using texture + * coordinates that are contained within the loaded ones. For instance: + * + * @code{.c} + * // Load a 32x32 sprite starting at position (100,100) in the + * // "spritemap" surface. + * rdpq_tex_upload_sub(TILE2, spritemap, 0, 100, 100, 132, 132); + * + * // Draw the sprite. Notice that we must refer to it using the + * // original texture coordinates, even if just that portion is in TMEM. + * rdpq_texture_rectangle(TILE2, + * pos_x, pos_y, pos_x+32, pos_y+32, // screen coordinates of the sprite + * 100, 100, // texture coordinates + * 1.0, 1.0); // texture increments (= no scaling) + * @endcode + * + * An alternative to this function is to call #surface_make_sub on the texture + * to create a sub-surface, and then call rdpq_tex_upload on the sub-surface. + * The same data will be loaded into TMEM but this time the RDP ignores that + * you are loading a portion of a larger texture: + * + * @code{.c} + * // Create a sub-surface of spritemap texture. No memory allocations + * // or pixel copies are performed, this is just a rectangular "window" + * // into the original texture. + * surface_t hero = surface_make_sub(spritemap, 100, 100, 32, 32); + * + * // Load the sub-surface. Notice that the RDP is unaware that it is + * // a sub-surface; it will think that it is a whole texture. + * rdpq_tex_upload(TILE2, &hero, 0); + * + * // Draw the sprite. Notice that we must refer to it using + * // texture coordinates (0,0). + * rdpq_texture_rectangle(TILE2, + * pos_x, pos_y, pos_x+32, pos_y+32, // screen coordinates of the sprite + * 0, 0, // texture coordinates + * 1.0, 1.0); // texture increments (= no scaling) + * @endcode + * + * The only limit of this second solution is that the sub-surface pointer must + * be 8-byte aligned (like all RDP textures), so it can only be used if the + * rectangle that needs to be loaded respects such constraint as well. + * + * + * @param tile Tile descriptor that will be initialized with this texture + * @param tex Surface containing the texture to load + * @param parms All optional parameters on where to load the texture and how to sample it. Refer to #rdpq_texparms_t for more information. + * @param s0 Top-left X coordinate of the rectangle to load + * @param t0 Top-left Y coordinate of the rectangle to load + * @param s1 Bottom-right *exclusive* X coordinate of the rectangle + * @param t1 Bottom-right *exclusive* Y coordinate of the rectangle + * @return int Number of bytes used in TMEM for this texture + * + * @see #rdpq_tex_upload + * @see #surface_make_sub + */ +int rdpq_tex_upload_sub(rdpq_tile_t tile, const surface_t *tex, const rdpq_texparms_t *parms, int s0, int t0, int s1, int t1); + +/** + * @brief Load one or more palettes into TMEM + * + * This function allows to load one or more palettes into TMEM. + * + * When using palettes, the upper half of TMEM is allocated to them. There is room + * for 256 colors in total, which allows for one palette for a CI8 texture, or up + * to 16 palettes for CI4 textures. + * + * @param tlut Pointer to the color entries to load + * @param color_idx First color entry in TMEM that will be written to (0-255) + * @param num_colors Number of color entries to load (1-256) + */ +void rdpq_tex_upload_tlut(uint16_t *tlut, int color_idx, int num_colors); + +/** + * @brief Reuse a portion of the previously uploaded texture to TMEM + * + * When a texture has been uploaded, its possible to reuse it for multiple tiles + * without increasing TMEM usage. This function provides a way to achieve this while also + * configuring your own texture parameters for the reused texture. + * + * This sub-variant also allows to specify what part of the uploaded texture must be reused. + * For example, after uploading a 64x64 texture (or a 64x64 sub texture of a larger surface), + * you can reuse an existing portion of it, like (16,16)-(48,48) or (0,0)-(8,32). + * Restrictions of rdpq_texparms_t apply just when reusing just as well as for uploading a texture. + * + * Sub-rectangle must be within the bounds of the texture reused and be 8-byte aligned, + * not all starting positions are valid for different formats. + * + * Starting horizontal position s0 must be 8-byte aligned, meaning for different image formats + * you can use TEX_FORMAT_BYTES2PIX(fmt, bytes) with bytes being in multiples of 8. + * Starting vertical position t0 must be in multiples of 2 pixels due to TMEM arrangement. + * + * Leaving parms to NULL will copy the previous' texture texparms. + * Note: This function must be executed in a multi-upload block right after the reused texture has been + * uploaded. + * + * @param tile Tile descriptor that will be initialized with reused texture + * @param parms All optional parameters on how to sample reused texture. Refer to #rdpq_texparms_t for more information. + * @param s0 Top-left X coordinate of the rectangle to reuse + * @param t0 Top-left Y coordinate of the rectangle to reuse + * @param s1 Bottom-right *exclusive* X coordinate of the rectangle + * @param t1 Bottom-right *exclusive* Y coordinate of the rectangle + * @return int Number of bytes used in TMEM for this texture (always 0) + */ +int rdpq_tex_reuse_sub(rdpq_tile_t tile, const rdpq_texparms_t *parms, int s0, int t0, int s1, int t1); + +/** + * @brief Reuse the previously uploaded texture to TMEM + * + * When a texture has been uploaded, its possible to reuse it for multiple tiles + * without increasing TMEM usage. This function provides a way to achieve this while also + * configuring your own texture parameters for the reused texture. + * + * This full-variant will use the whole texture that was previously uploaded. + * Leaving parms to NULL will copy the previous' texture texparms. + * + * Note: This function must be executed in a multi-upload block right after the reused texture has been + * uploaded. + * + * @param tile Tile descriptor that will be initialized with reused texture + * @param parms All optional parameters on how to sample reused texture. Refer to #rdpq_texparms_t for more information. + * @return int Number of bytes used in TMEM for this texture (always 0) + */ +int rdpq_tex_reuse(rdpq_tile_t tile, const rdpq_texparms_t *parms); + +/** + * @brief Begin a multi-texture upload + * + * This function begins a multi-texture upload, with automatic TMEM layout. + * There are two main cases where you may want to squeeze multiple textures + * within TMEM: when loading mipmaps, and when using multi-texturing. + * + * After calling #rdpq_tex_multi_begin, you can call #rdpq_tex_upload multiple + * times in sequence, without manually specifying a TMEM address. The functions + * will start filling TMEM from the beginning, in sequence. + * + * If the TMEM becomes full and is unable to fullfil a load, an assertion + * will be issued. + * + * @note When calling #rdpq_tex_upload or #rdpq_tex_upload_sub in this mode, + * do not specify a TMEM address in the parms structure, as the actual + * address is automatically calculated. + * + * @see #rdpq_tex_upload + * @see #rdpq_tex_upload_sub + * @see #rdpq_tex_multi_end + */ +void rdpq_tex_multi_begin(void); + + +/** + * @brief Finish a multi-texture upload + * + * This function finishes a multi-texture upload. See #rdpq_tex_multi_begin + * for more information. + * + * @returns The number of bytes used in TMEM for this multi-texture upload + * + * @see #rdpq_tex_multi_begin. + */ +int rdpq_tex_multi_end(void); + + +/** + * @brief Blitting parameters for #rdpq_tex_blit. + * + * This structure contains all possible parameters for #rdpq_tex_blit. + * The various fields have been designed so that the 0 value is always the most + * reasonable default. This means that you can simply initialize the structure + * to 0 and then change only the fields you need (for instance, through a + * compound literal). + * + * See #rdpq_tex_blit for several examples. + */ +typedef struct rdpq_blitparms_s { + rdpq_tile_t tile; ///< Base tile descriptor to use (default: TILE_0); notice that two tiles will often be used to do the upload (tile and tile+1). + int s0; ///< Source sub-rect top-left X coordinate + int t0; ///< Source sub-rect top-left Y coordinate + int width; ///< Source sub-rect width. If 0, the width of the surface is used + int height; ///< Source sub-rect height. If 0, the height of the surface is used + bool flip_x; ///< Flip horizontally. If true, the source sub-rect is treated as horizontally flipped (so flipping is performed before all other transformations) + bool flip_y; ///< Flip vertically. If true, the source sub-rect is treated as vertically flipped (so flipping is performed before all other transformations) + + int cx; ///< Transformation center (aka "hotspot") X coordinate, relative to (s0, t0). Used for all transformations + int cy; ///< Transformation center (aka "hotspot") X coordinate, relative to (s0, t0). Used for all transformations + float scale_x; ///< Horizontal scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f) + float scale_y; ///< Vertical scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f) + float theta; ///< Rotation angle in radians + + // FIXME: replace this with CPU tracking of filtering mode? + bool filtering; ///< True if texture filtering is enabled (activates workaround for filtering artifacts when splitting textures in chunks) + + // FIXME: remove this? + int nx; ///< Texture horizontal repeat count. If 0, no repetition is performed (the same as 1) + int ny; ///< Texture vertical repeat count. If 0, no repetition is performed (the same as 1) +} rdpq_blitparms_t; + +/** + * @brief Blit a surface to the active framebuffer + * + * This is the highest level function for drawing an arbitrary-sized surface + * to the screen, possibly scaling and rotating it. + * + * It handles all the required steps to blit the entire contents of a surface + * to the framebuffer, that is: + * + * * Logically split the surface in chunks that fit the TMEM + * * Calculate an appropriate scaling factor for each chunk + * * Load each chunk into TMEM (via #rdpq_tex_upload) + * * Draw each chunk to the framebuffer (via #rdpq_texture_rectangle or #rdpq_triangle) + * + * Note that this function only performs the actual blits, it does not + * configure the rendering mode or handle palettes. Before calling this + * function, make sure to configure the render mode via + * #rdpq_set_mode_standard (or #rdpq_set_mode_copy if no scaling and pixel + * format conversion is required). If the surface uses a palette, you also + * need to load the palette using #rdpq_tex_upload_tlut. + * + * This function is able to perform many different complex transformations. The + * implementation has been tuned to try to be as fast as possible for simple + * blits, but it scales up nicely for more complex operations. + * + * The parameters that describe the transformations to perform are passed in + * the @p parms structure. The structure contains a lot of fields, but it has + * been designed so that most of them can be simply initalized to zero to + * disable advanced behaviors (and thus simply left unmentioned in an inline + * initialization). + * + * For instance, this blits a large image to the screen, aligning it to the + * top-left corner (eg: a splashscreen). + * + * @code{.c} + * rdpq_tex_blit(splashscreen, 0, 0, NULL); + * @endcode + * + * This is the same, but the image will be centered on the screen. To do this, + * we specify the center of the screen as position, and then we set the hotspost + * of the image ("cx" and "cy" fields) to its center: + * + * @code{.c} + * rdpq_tex_blit(splashscreen, 320/2, 160/2, &(rdpq_blitparms_t){ + * .cx = splashscreen->width / 2, + * .cy = splashscreen->height / 2, + * }); + * @endcode + * + * This examples scales a 64x64 image to 256x256, putting its center near the + * top-left of the screen (so part of resulting image will be offscreen): + * + * @code{.c} + * rdpq_tex_blit(splashscreen, 20, 20, &(rdpq_blitparms_t){ + * .cx = splashscreen->width / 2, .cy = splashscreen->height / 2, + * .scale_x = 4.0f, .scale_y = 4.0f, + * }); + * @endcode + * + * This example assumes that the surface is a spritemap with frames of size + * 32x32. It selects the sprite at row 4, column 2, and draws it centered + * at position 100,100 on the screen applying a rotation of 45 degrees around its center: + * + * @code{.c} + * rdpq_tex_blit(splashscreen, 100, 100, &(rdpq_blitparms_t){ + * .s0 = 32*2, .t0 = 32*4, + * .width = 32, .height = 32, + * .cx = 16, .cy = 16, + * .theta = M_PI/4, + * }); + * @endcode + * + * @param surf Surface to draw + * @param x0 X coordinate on the framebuffer where to draw the surface + * @param y0 Y coordinate on the framebuffer where to draw the surface + * @param parms Parameters for the blit operation (or NULL for default) + */ +void rdpq_tex_blit(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms); + +///@cond +__attribute__((deprecated("use rdpq_tex_upload instead"))) +static inline int rdpq_tex_load(rdpq_tile_t tile, surface_t *tex, const rdpq_texparms_t *parms) { + return rdpq_tex_upload(tile, tex, parms); +} +__attribute__((deprecated("use rdpq_tex_upload_sub instead"))) +static inline int rdpq_tex_load_sub(rdpq_tile_t tile, surface_t *tex, const rdpq_texparms_t *parms, int s0, int t0, int s1, int t1) { + return rdpq_tex_upload_sub(tile, tex, parms, s0, t0, s1, t1); +} +__attribute__((deprecated("use rdpq_tex_upload_tlut instead"))) +static inline void rdpq_tex_load_tlut(uint16_t *tlut, int color_idx, int num_colors) { + return rdpq_tex_upload_tlut(tlut, color_idx, num_colors); +} +///@endcond + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rdpq_tri.h b/include/rdpq_tri.h new file mode 100644 index 0000000000..caf16564be --- /dev/null +++ b/include/rdpq_tri.h @@ -0,0 +1,247 @@ +/** + * @file rdpq.h + * @brief RDP Command queue + * @ingroup rdpq + * + */ + +#ifndef LIBDRAGON_RDPQ_TRI_H +#define LIBDRAGON_RDPQ_TRI_H + +#include "rdpq.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Format descriptor of a triangle + * + * This structure holds the parameters required to draw triangles. + * It contains both a description of the vertex format, and some + * configuration parameters for the triangle rasterizer. + * + * This library provides a few predefined formats (such as #TRIFMT_FILL, + * #TRIFMT_TEX, etc.) but you are free to define your own format. + * + * There is no overhead in using a custom format or even switching + * format from a triangle to another (besides the required mode changes), + * so feel free to define as many formats are required for your application. + * + * Refer to #rdpq_triangle for a description of the different vertex + * components. + */ +typedef struct rdpq_trifmt_s { + /** + * @brief Index of the position component within the vertex arrays. + * + * For instance, if `pos_offset == 4`, `v1[4]` and `v1[5]` must be the X and Y + * coordinates of the first vertex. + */ + int pos_offset; + + /** + * @brief Index of the shade component within the vertex arrays. + * + * For instance, if `shade_offset == 4`, `v1[4]`, `v1[5]`, `v1[6]`, `v1[7]` must be + * the R, G, B, A values associated to the first vertex. If shade_offset is less + * than 0, no shade component will be used to draw the triangle. + */ + int shade_offset; + + /** + * @brief If true, draw the triangle with flat shading (instead of gouraud shading). + * + * This parameter is ignored if the shade component does not exist (`shade_offset < 0`). + * Normally, gouraud shading is used to draw triangles, which means that the shading + * of each vertex is interpolated across the triangle. If flat shading is enabled, the + * shading of the first vertex is used for the whole triangle. + */ + bool shade_flat; + + /** + * @brief Index of the texture component within the vertex arrays. + * + * For instance, if `tex_offset == 4`, `v1[4]`, `v1[5]`, `v1[6]` must be the S, T, W + * values associated to the first vertex. If tex_offset is less than 0, no texture + * component will be used to draw the triangle. + */ + int tex_offset; + + /** + * @brief RDP tile descriptor that describes the texture (0-7). + * + * This parameter is ignored if the texture component does not exist (`tex_offset < 0`). + * In case of multi-texturing, `tile + 1` will be used for the second texture. + * Notice that the tile descriptor must be configured before drawing the triangle. + */ + rdpq_tile_t tex_tile; + + /** + * @brief Number of mipmaps to use for the texture. + * + * This parameter is ignored if the texture component does not exist (`tex_offset < 0`), + * or if mipmapping has not been configured. + * + * Notice that when using the mode API (#rdpq_mode_mipmap), the number of mipmaps + * is specified there, so this parameter should be left to zero. + */ + int tex_mipmaps; + + /** + * @brief Index of the depth component within the vertex array. + * + * For instance, if `z_offset == 4`, `v1[4]` must be the Z coordinate of the first + * vertex. If z_offset is less than 0, no depth component will be used to + * draw the triangle. + */ + int z_offset; +} rdpq_trifmt_t; + +/** + * @brief Format descriptor for a solid-filled triangle. + * + * Vertex array format: `(float){X, Y}` (2 floats) + * + * Given that only position is provided, the triangle is drawn with a solid color, + * which is the output of the color combiner. See #rdpq_mode_combiner for more + * information. + * + * A common choice for a combiner formula is #RDPQ_COMBINER_FLAT, that will + * simply output whatever color is configured via #rdpq_set_prim_color. + */ +extern const rdpq_trifmt_t TRIFMT_FILL; + +/** + * @brief Format descriptor for a shaded triangle. + * + * Vertex array format: `(float){X, Y, R, G, B, A}` (6 floats) + */ +extern const rdpq_trifmt_t TRIFMT_SHADE; + +/** + * @brief Format descriptor for a textured triangle. + * + * Vertex array format: `(float){X, Y, S, T, INV_W}` (5 floats) + */ +extern const rdpq_trifmt_t TRIFMT_TEX; + +/** + * @brief Format descriptor for a shaded, textured triangle. + * + * Vertex array format: `(float){X, Y, R, G, B, A, S, T, INV_W}` (9 floats) + */ +extern const rdpq_trifmt_t TRIFMT_SHADE_TEX; + +/** + * @brief Format descriptor for a solid-filled, z-buffered triangle. + * + * Vertex array format: `(float){X, Y, Z}` (3 floats) + */ +extern const rdpq_trifmt_t TRIFMT_ZBUF; + +/** + * @brief Format descriptor for a z-buffered, shaded triangle. + * + * Vertex array format: `(float){X, Y, Z, R, G, B, A}` (7 floats) + */ +extern const rdpq_trifmt_t TRIFMT_ZBUF_SHADE; + +/** + * @brief Format descriptor for a z-buffered, textured triangle. + * + * Vertex array format: `(float){X, Y, Z, S, T, INV_W}` (6 floats) + */ +extern const rdpq_trifmt_t TRIFMT_ZBUF_TEX; + +/** + * @brief Format descriptor for a z-buffered, shaded, textured triangle. + * + * Vertex array format: `(float){X, Y, Z, R, G, B, A, S, T, INV_W}` (10 floats) + */ +extern const rdpq_trifmt_t TRIFMT_ZBUF_SHADE_TEX; + +/** + * @brief Draw a triangle (RDP command: TRI_*) + * + * This function allows to draw a triangle into the framebuffer using RDP, in screen coordinates. + * RDP does not handle transform and lightning, so it only reasons of screen level coordinates. + * + * Each vertex of a triangle is made of up to 4 components: + * + * * Position. 2 values: X, Y. The values must be in screen coordinates, that is they refer + * to the framebuffer pixels. Fractional values allow for subpixel precision. Supported + * range is [-4096..4095] (numbers outside that range will be clamped). + * * Depth. 1 value: Z. Supported range in [0..1]. + * * Shade. 4 values: R, G, B, A. The values must be in the 0..1 range. + * * Texturing. 3 values: S, T, INV_W. The values S,T address the texture specified by the tile + * descriptor. INV_W is the inverse of the W vertex coordinate in clip space (after + * projection), a value commonly used to do the final perspective division. This value is + * required to do perspective-corrected texturing. + * + * Only the position is mandatory, all other components are optionals, depending on the kind of + * triangle that needs to be drawn. For instance, specifying only position and shade will allow + * to draw a gouraud-shaded triangle with no texturing and no z-buffer usage. + * + * The vertex components must be provided via arrays of floating point values. The order of + * the components within the array is flexible, and can be specified at call time via the + * #rdpq_trifmt_t structure. + * + * Notice that it is important to configure the correct render modes before calling this function. + * Specifically: + * + * * To use the depth component, you must activate the z-buffer via #rdpq_mode_zbuf. + * * To use the shade component, you must configure a color combiner formula via #rdpq_mode_combiner. + * The formula must use the SHADE slot, to specify the exact pixel formula that will combine the + * per-pixel color value with other components, like the texture. + * * To use the texturing component, you must configure a color combiner formula via #rdpq_mode_combiner + * that uses the TEX0 (and/or TEX1) slot, such as #RDPQ_COMBINER_TEX or #RDPQ_COMBINER_SHADE, + * to specify the exact pixel formula that will combine the per-pixel color value with other + * components, like the shade. Moreover, you can activate perspective texturing via #rdpq_mode_persp. + * + * If you fail to activate a specific render mode for a provided component, the component will be ignored + * by RDP. For instance, if you provide S,T,W but do not configure a combiner formula that accesses + * TEX0, the texture will not be rendered. On the contrary, if you activate a specific render mode + * but then fail to provide the component (eg: activate z buffering but then fail to provide a depth + * component), RDP will fall into undefined behavior that can vary from nothing being rendered, garbage + * on the screen or even a freeze. The rdpq validator will do its best to help you catching these mistakes, + * so remember to activate it via #rdpq_debug_start whenever you get a surprising result. + * + * For instance, this code snippet will draw a filled triangle, with a flat green color: + * + * @code + * // Reset to standard rendering mode. + * rdpq_set_mode_standard(); + * + * // Configure the combiner for flat-color rendering + * rdpq_mode_combiner(RDPQ_COMBINER_FLAT); + * + * // Configure the flat color + * rdpq_set_prim_color(RGBA32(0, 255, 0, 255)); + * + * // Draw the triangle + * float v1[] = { 100, 100 }; + * float v2[] = { 200, 200 }; + * float v3[] = { 100, 200 }; + * rdpq_triangle(&TRIFMT_FILL, v1, v2, v3); + * @endcode + * + * The three vertices (v1, v2, v3) can be provided in any order (clockwise or counter-clockwise). The + * function will render the triangle in any case (so back-face culling must be handled before calling + * it). + * + * @param fmt Format of the triangle being drawn. This structure specifies the order of the + * components within the vertex arrays, and also some additional rasterization + * parameters. You can pass one of the predefined formats (#TRIFMT_FILL, + * #TRIFMT_TEX, etc.), or a custom one. + * @param v1 Array of components for vertex 1 + * @param v2 Array of components for vertex 2 + * @param v3 Array of components for vertex 3 + */ +void rdpq_triangle(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/rsp_queue.inc b/include/rsp_queue.inc index 80fd584286..768a03adb9 100644 --- a/include/rsp_queue.inc +++ b/include/rsp_queue.inc @@ -8,10 +8,12 @@ #include #include +#include ######################################################## # # HOW TO WRITE AN OVERLAY: +# # 1. Put `#include ` before any text or data # segments in your file. # 2. Define the overlay header using RSPQ_BeginOverlayHeader @@ -109,7 +111,7 @@ _RSPQ_OVERLAY_COMMAND_TABLE: # its size must not be zero. ######################################################## .macro RSPQ_BeginSavedState - .align 3 + .align 4 _RSPQ_SAVED_STATE_START: .endm @@ -228,22 +230,23 @@ _RSPQ_SAVED_STATE_END: # We also define direct access to small constants as they can be useful in some # calculations. -#define K1 vshift,e(7) -#define K2 vshift,e(6) -#define K4 vshift,e(5) -#define K8 vshift,e(4) -#define K16 vshift,e(3) -#define K32 vshift,e(2) -#define K64 vshift,e(1) -#define K128 vshift,e(0) -#define K256 vshift8,e(7) -#define K512 vshift8,e(6) -#define K1024 vshift8,e(5) -#define K2048 vshift8,e(4) -#define K4096 vshift8,e(3) -#define K8192 vshift8,e(2) -#define K16384 vshift8,e(1) -#define KM32768 vshift8,e(0) // note: this is actually -32768 for most arithmetic operations +#define K1 vshift.e7 +#define K2 vshift.e6 +#define K4 vshift.e5 +#define K8 vshift.e4 +#define K16 vshift.e3 +#define K32 vshift.e2 +#define K64 vshift.e1 +#define K128 vshift.e0 +#define K256 vshift8.e7 +#define K512 vshift8.e6 +#define K1024 vshift8.e5 +#define K2048 vshift8.e4 +#define K4096 vshift8.e3 +#define K8192 vshift8.e2 +#define K16384 vshift8.e1 +#define K32768 vshift8.e0 +#define KM32768 vshift8.e0 // note: this is actually -32768 for most arithmetic operations ######################################################## @@ -281,6 +284,58 @@ RSPQ_POINTER_STACK: .ds.l (RSPQ_MAX_BLOCK_NESTING_LEVEL+2) # RDRAM address of the current command list. RSPQ_RDRAM_PTR: .long 0 +################################################################ +# RDPQ shared state +# +# The next variables are state managed by rsp_rdpq.inc. +# Not all overlays need rsp_rdpq.inc, but those that do, need +# this state to be shared among all of them. We don't have +# a way to do that currently, so the best workaround is to +# keep this shared state in the rspq state itself, which is +# always loaded in DMEM. This wastes some DMEM for overlays +# that don't need to generate RDP commands, but it's the least +# worse solution we can currently come up with. +################################################################ + +# Pointer to the end of the current RDP output buffer +RDPQ_SENTINEL: .long 0 + +# RDP MODE (32 bytes). NOTE: This must be 16-byte aligned (because we use lqv in push/pop), +# but we can't use .align 4 here, otherwise it's not easy to keep this structure layout +# in sync with the C side (rdpq_state_t in rdpq.c). +RDPQ_MODE: + # Combiner formula set by the application (1pass or 2pass) + RDPQ_COMBINER: .quad 0 + RDPQ_COMBINER_MIPMAPMASK: .quad 0 + # Blender settings: up to two steps. Either of them + # is already in a format valid for both 1cyc and 2cyc mode. + RDPQ_MODE_BLENDER_STEPS: .word 0,0 + # Curent state of "Set Other Modes" RDP mode command, plus our own extension + # fields (see SOMX_* in rdpq_macros.h). Notice that the top byte also contains + # extensions fields, so when sending this command to RDP, the top byte must be + # changed with the SOM command ID (0xEF) in the RDP output buffer. + RDPQ_OTHER_MODES: .quad 0 +RDPQ_MODE_END: + +# Current scissor rectangle (in RDP commmand format) +RDPQ_SCISSOR_RECT: .quad 0 +# Two RDP output buffers (to alternate between) +RDPQ_DYNAMIC_BUFFERS: .long 0, 0 +# Current RDP write pointer (8 MSB are garbage) +RDPQ_CURRENT: .long 0 +# Current fill color +RDPQ_FILL_COLOR: .word 0 +# Current target buffer color depth +RDPQ_TARGET_BITDEPTH: .byte 0 +# True if there is a SYNC_FULL command in the current RDP output buffer +RDPQ_SYNCFULL_ONGOING: .byte 0 +# True if the rdpq debug mode is active (validator). TODO: hide this bit elsewhere to save one byte +RDPQ_DEBUG: .byte 0 + +################################################################ +# End of RDPQ shared state +################################################################ + # Index (not ID!) of the current overlay, as byte offset in the descriptor array RSPQ_CURRENT_OVL: .half 0 @@ -299,15 +354,14 @@ RSPQ_DefineCommand RSPQCmd_Dma, 16 # 0x05 RSPQ_DefineCommand RSPQCmd_WriteStatus, 4 # 0x06 -- must be even (bit 24 must be 0) RSPQ_DefineCommand RSPQCmd_SwapBuffers, 12 # 0x07 RSPQ_DefineCommand RSPQCmd_TestWriteStatus, 8 # 0x08 -- must be even (bit 24 must be 0) +RSPQ_DefineCommand RSPQCmd_RdpWaitIdle, 4 # 0x09 +RSPQ_DefineCommand RSPQCmd_RdpSetBuffer, 12 # 0x0A +RSPQ_DefineCommand RSPQCmd_RdpAppendBuffer, 4 # 0x0B -#if RSPQ_DEBUG .align 3 -RSPQ_LOG_IDX: .long 0 -RSPQ_LOG: .ds.l 16 -RSPQ_LOG_END: .long RSPQ_DEBUG_MARKER +#if RSPQ_DEBUG + .long 0, RSPQ_DEBUG_MARKER #endif - - .align 3 RSPQ_DMEM_BUFFER: .ds.b RSPQ_DMEM_BUFFER_SIZE @@ -384,14 +438,6 @@ RSPQ_Loop: # Read first word lw a0, %lo(RSPQ_DMEM_BUFFER) + 0x0 (rspq_dmem_buf_ptr) -#if RSPQ_DEBUG - lw t0, %lo(RSPQ_LOG_IDX) - sw a0, %lo(RSPQ_LOG)(t0) - addi t0, 4 - andi t0, 16*4-1 - sw t0, %lo(RSPQ_LOG_IDX) -#endif - # Index into overlay table srl t0, a0, 28 @@ -654,6 +700,120 @@ RSPQCmd_Dma: move t2, a3 .endfunc + ############################################################# + # RSPQCmd_RdpSetBuffer + # + # Configure a new output buffer in RDRAM for RDP. This can be + # used to either send a buffer of commands to RDP, or to setup + # a buffer for generation of new RDP commands. + # In fact, a1/a0 will be written to DP_START/DP_END, so if + # DP_END > DP_START, the commands contained in the buffer will + # be immediately processed by RDP. If DP_START==DP_END, the buffer + # is basically "stored" in the RDP registers but nothing happens: + # subsequent calls to RSPQ_RdpSend will append commands to it, + # until the sentinel (a2) is hit, which means that the buffer is + # full. At this point, RSPQ_RdpSend will get back to the + # standard buffers (RDPQ_DYNAMIC_BUFFERS). + # + # ARGS: + # a0: New end pointer (to write to DP_END) + # a1: New start buffer (to write to DP_START) + # a2: New sentinel (end of total capacity of the buffer) + # + # NOTE: if the sentinel is 0, the command will force a switch + # to the RDPQ dynamic buffer (the next one). This will happen + # as soon as next RDPQ_Send is run. + ############################################################# + .func RSPQCmd_RdpSetBuffer +RSPQCmd_RdpSetBuffer: + # Update the sentinel. If zero, there's nothing more to do: next + # RDPQ_Send will switch to the next dynamic buffer. + sw a2, %lo(RDPQ_SENTINEL) + beqz a2, JrRa + move ra2, ra + + # Wait for RDP DMA FIFO to be not full. If there's another + # pending buffer, we cannot do anything but wait. + lbu t3, %lo(RDPQ_SYNCFULL_ONGOING) + jal RSPQ_RdpWait + ori t3, DP_STATUS_END_VALID + + # In case there was a SYNC_FULL pending, before switching to next buffer, we need + # to actually write the current one to RDP. Notice that the following two instructions + # are actually nops in case there was no SYNC_FULL (as DP_END == RDPQ_CURRENT in that case). + sb zero, %lo(RDPQ_SYNCFULL_ONGOING) + lw t0, %lo(RDPQ_CURRENT) +#ifdef NDEBUG + mtc0 t0, COP0_DP_END +#else + # Check if the RDPQ debug mode is active (validator) + lbu t2, %lo(RDPQ_DEBUG) + beqz t2, 1f + mtc0 t0, COP0_DP_END + + # Generate a RSP interrupt to tell the CPU to fetch the new DP_START / DP_END. + # Then also wait until the CPU has acknowledged fetching the buffer (via SIG0) + # so that we absolutely don't lose the buffer. + jal SpStatusWait + li t2, SP_STATUS_SIG0 + li t0, SP_WSTATUS_SET_SIG0 | SP_WSTATUS_SET_INTR + mtc0 t0, COP0_SP_STATUS + jal SpStatusWait + li t2, SP_STATUS_SIG0 +#endif +1: move ra, ra2 + + # Write new start buffer pointer, and fallthrough to + # RSPQCmd_RdpAppendBuffer to write the new end pointer + mtc0 a1, COP0_DP_START + # fallthrough! + .endfunc + + + ############################################################# + # RSPQCmd_RdpAppendBuffer + # + # Tell RDP to run more commands that were appended to the end + # of the current RDP output buffer. + # + # ARGS: + # a0: New end pointer (to write to DP_END) + ############################################################# + .func RSPQCmd_RdpAppendBuffer +RSPQCmd_RdpAppendBuffer: + lbu t0, %lo(RDPQ_SYNCFULL_ONGOING) + bnez t0, JrRa + sw a0, %lo(RDPQ_CURRENT) + jr ra + mtc0 a0, COP0_DP_END + .endfunc + + ############################################################# + # RSPQCmd_RdpWaitIdle + # + # Wait until RDP is idle. + # + # *NOTE*: a SYNC_FULL command *must* have been already written + # to the RDP output buffer, otherwise this function will stall + # forever. In fact, once the RDP is running, it never gets back + # to idle state until a SYNC_FULL is found, even if it has no + # more commands to run. + ############################################################# + .func RSPQCmd_RdpWaitIdle +RSPQCmd_RdpWaitIdle: + li t3, DP_STATUS_BUSY + +RSPQ_RdpWait: + mfc0 t2, COP0_DP_STATUS +1: + # Wait for selected RDP status bits to become 0. + and t1, t2, t3 + bnez t1, 1b + mfc0 t2, COP0_DP_STATUS + jr ra + nop + .endfunc + #include #include diff --git a/include/rsp_rdpq.inc b/include/rsp_rdpq.inc new file mode 100644 index 0000000000..efcee10bd8 --- /dev/null +++ b/include/rsp_rdpq.inc @@ -0,0 +1,1340 @@ + +############################################################################## +# RDPQ library +############################################################################## +# +# This library allows rspq overlays to emit RDP commands. +# +# If you want to write a rspq overlay that emits RDP commands, do the following: +# +# * Include rsp_rdpq.inc (this file) at the *end* of your overlay source +# code. +# * In your code, prepare the RDP command in a0/a1 (+ a2/a3 for 16 bytes +# commands) and then call RDPQ_Write8 or RDPQ_Write16 to store it into +# a temporary DMEM buffer (RDP_CMD_STAING). You can do this as many times +# as you need. +# * Call RDPQ_Finalize to send the commands to RDP for drawing. This must +# currently be the last thing your command does, as that function +# doesn't return but go back to processing next command (RSPQ_Loop). +# +############################################################################## + + +#include "rdpq_macros.h" +#include "rdpq_constants.h" + +######################################################################### +# +# RDPQ_Send: send commands from DMEM to RDP +# +########################################################################## + + .section .data.rdpq_send + +# TODO: get rid of the constant offset +RDPQ_CMD_PTR: .word RDPQ_CMD_STAGING - 0xA4000000 + + .section .bss.rdpq_send + + + .align 4 + # Enough for a full triangle command +RDPQ_CMD_STAGING: .ds.b 0xB0 + + .section .text.rdpq_send + + ############################################################# + # RDPQ_Send + # + # Internal API for overlays that generate RDP commands. It + # runs a DMA transfer from DMEM to the RDP ouput buffer in RDRAM + # to copy some new RDP commands, and tell RDP to run them. + # + # ARGS: + # s4: buffer in DMEM containing RDP commands to send to RDP + # s3: pointer to the end of the buffer in DMEM (s3-s4 = size) + ############################################################# + .func RDPQ_Send +RDPQ_Send: + #define rdram_cur s0 + #define rdram_next a0 + #define sentinel a2 + #define buf_idx t4 + #define next_func t5 + + # Calculate buffer size and DMA transfer length + #ifndef NDEBUG + andi s3, 0xFFF + andi s4, 0xFFF + assert_ge s3, s4, RDPQ_ASSERT_SEND_INVALID_SIZE + #endif + sub rspq_cmd_size, s3, s4 + beqz rspq_cmd_size, JrRa # Exit if s3==s4 (0 byte transfer) + move ra2, ra + + # Fetch current pointer in RDRAM where to write the RDP command + # This is normally the same value that's in DP_END (unless we + # are holding writes because there is a SYNC_FULL pending). + lw rdram_cur, %lo(RDPQ_CURRENT) + and rdram_cur, 0xFFFFFF + + # Fetch the sentinel (end of buffer). Check whether there is + # enough room to add the new command. If so, run the DMA transfer, + # and then call RSPQCmd_RdpAppendBuffer to update DP_END to include + # the new commands. + lw sentinel, %lo(RDPQ_SENTINEL) + sub sentinel, rspq_cmd_size + bge sentinel, rdram_cur, do_dma + li next_func, RSPQCmd_RdpAppendBuffer + + # There is not enough space in the current buffer, so we will need to switch + # to the next. Since the RDP DMA can hold two buffers in total, and we have two + # buffers, we need to make sure that we are not overwriting the buffer that + # is currently playing. To do so, wait for the END_VALID to become 0, which means + # that only one buffer might be peInding. + # Also, in case there is a SYNC_FULL ongoing, we need to wait for it to finish before + # enqueuing a new buffer. RDPQ_SYNCFULL_ONGOING is set to DP_STATUS_BUSY in this case, + # so using that bit in the RSPQ_RdpWait wait mask will make sure we wait for the RDP + # to be idle. + lbu t3, %lo(RDPQ_SYNCFULL_ONGOING) + jal RSPQ_RdpWait + ori t3, DP_STATUS_END_VALID + + # Switch to the next dynamic buffer. + # Since there are two of them, also switch between + # them so next time we will pick the other one. + lw rdram_cur, %lo(RDPQ_DYNAMIC_BUFFERS) + 4 + lw t1, %lo(RDPQ_DYNAMIC_BUFFERS) + 0 + sw rdram_cur, %lo(RDPQ_DYNAMIC_BUFFERS) + 0 + sw t1, %lo(RDPQ_DYNAMIC_BUFFERS) + 4 + + # Calculate new sentinel (end of buffer) + add sentinel, rdram_cur, RDPQ_DYNAMIC_BUFFER_SIZE + + # Run the DMA transfer now, and after that, run RSPQCmd_RdpSetBuffer via + # tailcall. Prepare a1 for it, containing the pointer to the new buffer, + # which will be written into DP_START. + move a1, rdram_cur + li next_func, RSPQCmd_RdpSetBuffer + +do_dma: + # Start the transfer. Will tail-call to either RSPQCmd_RdpSetBuffer or + # RSPQCmd_RdpAppendBuffer (see above). For both, we need to prepare + # rdram_next (aka a0) with the updated pointer to RDRAM that will be + # written to DP_END to run the newly written commands. + add t0, rspq_cmd_size, -1 + jal DMAOut + add rdram_next, rdram_cur, rspq_cmd_size + + # Jump to continuation function (either RSPQCmd_RdpSetBuffer or + # RSPQCmd_RdpAppendBuffer), after recovering original return address. + jr next_func + move ra, ra2 + .endfunc + + #undef rdram_cur + #undef rdram_next + #undef sentinel + #undef buf_idx + #undef next_func + + .section .text.rdpq_send_helpers + + ############################################################# + # RDPQ_Finalize + # + # Submits the RDP command(s) in RDPQ_CMD_STAGING to the RDP. + ############################################################# + .func RDPQ_Finalize +RDPQ_Finalize: + li s4, %lo(RDPQ_CMD_STAGING) + lw s3, %lo(RDPQ_CMD_PTR) + sw s4, %lo(RDPQ_CMD_PTR) + jal_and_j RDPQ_Send, RSPQ_Loop + .endfunc + + ############################################################# + # RDPQ_Write8 + # + # Appends 8 bytes from a0-a1 to the staging area (RDPQ_CMD_STAGING). + ############################################################# + .func RDPQ_Write8 +RDPQ_Write8: + lw s0, %lo(RDPQ_CMD_PTR) + sw a0, 0x00(s0) + sw a1, 0x04(s0) + addi s0, 8 + jr ra + sw s0, %lo(RDPQ_CMD_PTR) + .endfunc + + ############################################################# + # RDPQ_Write16 + # + # Appends 16 bytes from a0-a3 to the staging area (RDPQ_CMD_STAGING). + ############################################################# + .func RDPQ_Write16 +RDPQ_Write16: + lw s0, %lo(RDPQ_CMD_PTR) + sw a0, 0x00(s0) + sw a1, 0x04(s0) + sw a2, 0x08(s0) + sw a3, 0x0C(s0) + addi s0, 16 + jr ra + sw s0, %lo(RDPQ_CMD_PTR) + .endfunc + + +######################################################################### +######################################################################### +# +# RDPQ Mode API: smart, assisted render-mode changes +# +# These functions implement the mode API. They can be useful +# for overlays that want to change RDP render mode, using the +# mode API for simplicity and interoperability. +# +########################################################################## +######################################################################### + + + .section .data.rdpq_mode_api + +AA_BLEND_MASK: + # MASK + .word SOM_COVERAGE_DEST_MASK | SOM_BLEND_MASK | SOM_BLALPHA_MASK | SOM_COLOR_ON_CVG_OVERFLOW +AA_BLEND_TABLE: + # AA=0 / BLEND=0 + .word SOM_COVERAGE_DEST_ZAP + # AA=0 / BLEND=1 + .word SOM_COVERAGE_DEST_ZAP + # AA=1 / BLEND=0 + .word SOM_BLALPHA_CVG | SOM_COVERAGE_DEST_CLAMP + # AA=1 / BLEND=1 + .word SOM_COLOR_ON_CVG_OVERFLOW | SOM_COVERAGE_DEST_WRAP + +AA_BLEND_DEFAULT_FORMULA: + .word RDPQ_BLENDER((IN_RGB, IN_ALPHA, MEMORY_RGB, MEMORY_CVG)) # Standard AA + .word RDPQ_BLENDER((IN_RGB, IN_ALPHA, MEMORY_RGB, MEMORY_CVG)) & ~SOM_READ_ENABLE # Reduced AA + +#define RDPQ_COMB_MIPMAP2 RDPQ_COMBINER2((TEX1, TEX0, LOD_FRAC, TEX0), (TEX1, TEX0, LOD_FRAC, TEX0), (0,0,0,0), (0,0,0,0)) +#define RDPQ_COMB_SHADE_FOG RDPQ_COMBINER1((0,0,0,SHADE), (0,0,0,1)) +#define RDPQ_COMB_TEX_SHADE_FOG RDPQ_COMBINER1((TEX0,0,SHADE,0), (0,0,0,TEX0)) + +COMB0_MASK: .quad RDPQ_COMB0_MASK +COMBINER_SHADE: .quad RDPQ_COMBINER_SHADE +COMBINER_SHADE_FOG: .quad RDPQ_COMB_SHADE_FOG +COMBINER_TEX_SHADE: .quad RDPQ_COMBINER_TEX_SHADE +COMBINER_TEX_SHADE_FOG: .quad RDPQ_COMB_TEX_SHADE_FOG + +COMBINER_MIPMAP2: .quad (RDPQ_COMB_MIPMAP2 & RDPQ_COMB0_MASK) | RDPQ_COMBINER_2PASS + + .section .bss.rdpq_mode_api + +# Temporary combiner memory location for RDPQ_UpdateRenderMode +RDPQ_MODE_COMBINER_1CYC: .quad 0 +RDPQ_MODE_COMBINER_2CYC: .quad 0 + + + .section .text.rdpq_mode_api + + .func RDPQ_SetBlendingMode +RDPQ_SetBlendingMode: + # Set the blending mode formula in the second step. Then: + # * If the formula is empty, clear also the first step if it was + # part of a previous two-step blending (SOMX_BLEND_2PASS). + # * If the formula is not empty and it is two-steps (SOMX_BLEND_2PASS), + # put it also in the first step. + # We cover both conditision in one single codeflow by putting into t0 + # the value to check against SOMX_BLEND_2PASS. + lw t0, %lo(RDPQ_MODE_BLENDER_STEPS) + 0 + beqz a1, setblending_check + sw a1, %lo(RDPQ_MODE_BLENDER_STEPS) + 4 + move t0, a1 +setblending_check: + andi t0, SOMX_BLEND_2PASS + beqz t0, RDPQ_UpdateRenderMode + nop + # fallthrough! + .endfunc + + .func RDPQCmd_SetFogMode +RDPQ_SetFogMode: + # Set the fog mode formula in the first step + j RDPQ_UpdateRenderMode + sw a1, %lo(RDPQ_MODE_BLENDER_STEPS) + 0 + .endfunc + + .func RDPQ_SetCombineMode_1Pass +RDPQ_SetCombineMode_1Pass: + # Turn off RDPQ_COMBINER_2PASS (bit 63). This is set by default + # because the overlay is regisred in slots 0xC0-0xF0. + # We need to remember that this combiner does not require 2 passes + xor a0, RDPQ_COMBINER_2PASS >> 32 + sw a2, %lo(RDPQ_COMBINER_MIPMAPMASK) + 0 + sw a3, %lo(RDPQ_COMBINER_MIPMAPMASK) + 4 + # fallthrough! + .endfunc + + .func RDPQ_SetCombineMode_2Pass +RDPQ_SetCombineMode_2Pass: + # Set correct SET_COMBINE opcode (0xFC). The opcode can be anything of + # the other 4 (1pass/2pass dynamic/static). + or a0, 0x7F000000 + xor a0, 0x7F000000 ^ 0x7C000000 + # Save the input combiner + sw a0, %lo(RDPQ_COMBINER) + 0 + sw a1, %lo(RDPQ_COMBINER) + 4 + # fallthrough! + .endfunc + + ########################################################### + # RDPQ_UpdateRenderMode + # + # This function is the core of the rdpq mode API. + # + # It performs several calculations and finally emit a + # new render mode (with SET_COMBINE_MODE + SET_OTHER_MODES). + # + # It handles: + # + # * If fog is enabled, tweak standard combiners to avoid + # passing SHADE_ALPHA to the blender as IN_ALPHA. + # * If interpolated mipmap is enabled, modify the color + # combiner adding the mipmap formula. + # * Merge the two blender steps (fogging / blending), taking + # care of adjustments if either is active or not. + # * Decide whether to use 1cycle or 2cycle mode, depending + # on color combiner, blender and mipmapping. + # * Adjust coverage modes depending on antialias and + # blending settings. + # + ########################################################### + + .func RDPQ_UpdateRenderMode +RDPQ_UpdateRenderMode: + #define comb_hi a0 + #define comb_lo a1 + #define som_hi a2 + #define som_lo a3 + #define comb_hi_noid t5 + #define blend_1cyc v0 + #define blend_2cyc v1 + #define blend_final v1 + #define passthrough t7 + #define cycle_type t6 + #define bkg_blending t8 + + # If updates are frozen, do nothing + lw som_hi, %lo(RDPQ_OTHER_MODES) + 0 + andi t0, som_hi, SOMX_UPDATE_FREEZE >> 32 + bnez t0, RSPQ_Loop + lw som_lo, %lo(RDPQ_OTHER_MODES) + 4 + + # If we are in fill/copy mode, we just need to emit SOM + sll t0, som_hi, 63 - (SOM_CYCLE_SHIFT+1) + bltz t0, rdpq_update_fillcopy + + # If the input combiner is 1-pass, proceed working on it + lw comb_hi, %lo(RDPQ_COMBINER) + 0 + bgez comb_hi, calc_comb_1cyc + lw comb_lo, %lo(RDPQ_COMBINER) + 4 + + # This is a 2-pass combiner. It is not compatible with interpolated mipmaps. + and t0, som_hi, SOMX_LOD_INTERPOLATE >> 32 + assert_eq t0, zero, RDPQ_ASSERT_MIPMAP_COMB2 + j store_comb_2cyc + +calc_comb_1cyc: + # Check if fogging is active + andi t0, som_hi, SOMX_FOG >> 32 + beqz t0, check_mipmap_interp + + # Create a copy of comb_hi without the cmd ID in the top MSB. + # The ID is kept sort of "random" for the whole computation, + # because it is reset to SET_COMBINE_MODE just at the end. + # So we cannot use it for the next comparisons. + sll comb_hi_noid, comb_hi, 8 + srl comb_hi_noid, 8 + + # When fogging is active, we need to adapt the standard color combiners to avoid + # using SHADE alpha, because it contains depth. We currently have two of them: + # COMBINER_TEX_SHADE and COMBINER_SHADE. +check_fog_tex_shade: + lw t0, %lo(COMBINER_TEX_SHADE) + 0 + bne t0, comb_hi_noid, check_fog_shade + lw t0, %lo(COMBINER_TEX_SHADE) + 4 + beq t0, comb_lo, fog_change + li s0, %lo(COMBINER_TEX_SHADE_FOG) + +check_fog_shade: + lw t0, %lo(COMBINER_SHADE) + 0 + bne t0, comb_hi_noid, check_mipmap_interp + lw t0, %lo(COMBINER_SHADE) + 4 + bne t0, comb_lo, check_mipmap_interp + li s0, %lo(COMBINER_SHADE_FOG) + +fog_change: + lw comb_hi, 0(s0) + lw comb_lo, 4(s0) + +check_mipmap_interp: + and t0, som_hi, SOMX_LOD_INTERPOLATE >> 32 + beqz t0, store_comb_1cyc + + # Interpolated mipmapping is active. We want to add RDPQ_COMB_MIPMAP as step0 + # and use only step 1 of the incoming formula. Unfortunately, this + # also means that all TEX0 slots must be converted into COMBINED slots. + # We do this by using the mask already loaded in a2/a3 + lw t0, %lo(RDPQ_COMBINER_MIPMAPMASK) + 0 + lw t1, %lo(RDPQ_COMBINER_MIPMAPMASK) + 4 + and comb_hi, t0 + and comb_lo, t1 + # Since this combiner now requires two-cycle mode, we can simply store in the + # 2-cycle mode slot. No need to touch the 1-cycle mode slot as it will not + # be used anyway. + lw t0, %lo(COMBINER_MIPMAP2) + 0 + lw t1, %lo(COMBINER_MIPMAP2) + 4 + or comb_hi, t0 + j store_comb_2cyc + or comb_lo, t1 + +store_comb_1cyc: + # The combiner settings is 1 pass. Store it as-is for 1cycle mode. + sw comb_hi, %lo(RDPQ_MODE_COMBINER_1CYC) + 0 + sw comb_lo, %lo(RDPQ_MODE_COMBINER_1CYC) + 4 + + # For 2 cycle mode, we need to adjust it changing the second pass + # to be a pure passthrough. We can do this by simply setting to 0 + # all fields of the second pass, as that corresponds to: + # (COMBINED - COMBINED) * COMBINED + COMBINED = COMBINED + lw t0, %lo(COMB0_MASK) + 0 + lw t1, %lo(COMB0_MASK) + 4 + and comb_hi, t0 + and comb_lo, t1 + + # Normallly at this point we don't need to set the 2PASS flag, as this + # combiner does not require 2cycles. The only exception is nearest mipmapping: + # in this case, we must force 2-cycle mode otherwise the RDP will not switch LOD. + srl t0, som_hi, SOM_TEXTURE_LOD_SHIFT - 32 + sll t0, 31 + or comb_hi, t0 + +store_comb_2cyc: + sw comb_hi, %lo(RDPQ_MODE_COMBINER_2CYC) + 0 + sw comb_lo, %lo(RDPQ_MODE_COMBINER_2CYC) + 4 + + ###################################### + # + # BLENDER STEPS + # + ###################################### + # + # Let's recap the meaning of SOM blending-related flags: + # + # SOM_BLENDING: process al pixels of the triangle through the blending unit + # SOM_AA_ENABLE: process edge pixels of the triangle through the blending unit + # + # So in general SOM_BLENDING is a superset of SOM_AA_ENABLE. + # Also notice that in 2cyc mode, SOM_BLENDING/SOM_AA only gate the + # *second cycle*, as the first cycle is always run for all pixels(!). + # + # This is the expected configuration for each combination of blending, + # fog and AA. Notice that in any case where SOM_BLENDING is set, setting + # SOM_AA_ENABLE is redundant, but it doesn't hurt. + # + # Blending | 1cyc | SOM_BLENDING + # Fog | 1cyc | SOM_BLENDING + # AA | 1cyc | SOM_AA_ENABLE + # Fog+Blending | 2cyc | SOM_BLENDING + # Fog+AA | 2cyc | SOM_AA_ENABLE + # Blending+AA | 1cyc | SOM_BLENDING (same BL config of "Blending") + # Fog+Blend+AA | 2cyc | SOM_BLENDING (same BL config of "Fog+Blending") + # + # Our input data: + # * RDPQ_MODE_BLENDER_STEPS+0: fog configuration if any, or 0. + # * RDPQ_MODE_BLENDER_STEPS+4: blender configuration if any, or 0. + # * SOM_AA_ENABLE: turned on if the user requested AA. + # + # Notice that the blender steps always include the SOM_BLENDING flag, if + # they are not zero. + + lw t0, %lo(RDPQ_MODE_BLENDER_STEPS) + 0 # Load step0 + lw t1, %lo(RDPQ_MODE_BLENDER_STEPS) + 4 # Load step1 + + # Check if step 1 contains a blending formula (before antialias). + sne bkg_blending, t1, zero + + # If step 1 is empty, check if antialias is active. If so, we need + # to merge in a default formula. Moreover, in this case, we don't want + # or need the SOM_BLENDING anymore (see the table above). + bnez t1, blender_check_merge + andi t2, som_lo, SOM_AA_ENABLE + beqz t2, blender_check_merge + nop + #if (SOMX_AA_REDUCED >> 32) != 4 + #error Adjust this if SOMX_AA_REDUCED changes + #endif + andi t1, som_hi, SOMX_AA_REDUCED >> 32 + lw t1, %lo(AA_BLEND_DEFAULT_FORMULA)(t1) + and t0, ~SOM_BLENDING + + # Merge the two blender steps (fogging + blending). If either + # is not set (0), we just configure the other one as follows: + # + # 1cyc: we turn off the second step (and'ing with SOM_BLEND0_MASK). + # This is strictly not necessary as the second step is ignored. + # 2cyc: we change the first step into a passthrough (all values 0), + # and keep the formula in the second step. + # + # If both steps are configured, we need to merge them: we keep fogging + # in the first step, and blending in the second. We also set SOMX_BLEND_2PASS + # to remember that we must force 2cycle mode. + # + # We also set the bkg_blending flag to 1 if the step1 formula is configured. + # This is an assumption documented in rdpq_mode.h: we assume that any step1 + # formula is a background blending formula. This assumption will be used + # later to configure the antialias, if requested. + + #define blend0_mask t2 + #define blend1_mask t3 +blender_check_merge: + li blend0_mask, SOM_BLEND0_MASK + li blend1_mask, SOM_BLEND1_MASK + + beqz t0, blender_1pass + move blend_1cyc, t1 + + beqz t1, blender_1pass + move blend_1cyc, t0 + +blender_2pass: + and t0, blend0_mask + and t1, blend1_mask + or blend_2cyc, t0, t1 + j 1f + or blend_2cyc, SOMX_BLEND_2PASS +blender_1pass: + and blend_2cyc, blend_1cyc, blend1_mask + and blend_1cyc, blend0_mask +1: + #undef blend0_mask + #undef blend1_mask + + ###################################### + # + # 1 CYCLE / 2 CYCLE MODE SELECTION + # + ###################################### + + # Automatic configuration of 1cycle / 2cycle. + # + # Check if either the current blender and combiner configuration require + # 2cycle mode: + # * Blender: bit 15 is set if 2cyc mode is required (SOMX_BLEND_2PASS) + # * Combiner: bit 63 is set if 2cyc mode is required (RDPQ_COMBINER_2PASS) + # + # First, we align both bits in bit 31 and we OR them together. + sll t2, blend_2cyc, 16 + lw t1, %lo(RDPQ_MODE_COMBINER_2CYC) # Fetch high word + or t1, t2 + # Point to either the 2cyc or 1cyc configuration, depending on what we need + # to load. + li s0, %lo(RDPQ_MODE_COMBINER_2CYC) + bltz t1, set_cycle_type + li cycle_type, ((SOM_CYCLE_MASK ^ SOM_CYCLE_2) >> 32) | 0x10000000 +set_1cyc: + li s0, %lo(RDPQ_MODE_COMBINER_1CYC) + move blend_final, blend_1cyc + li cycle_type, ((SOM_CYCLE_MASK ^ SOM_CYCLE_1) >> 32) | 0x10000000 +set_cycle_type: + # Set cycle type bits in other modes high word. Also put the correct + # command (0xEF) in the top byte: we achieve this by first setting the + # top byte to 0xFF, and then xoring with 0x10 (which is included in + # cycle_type). + or som_hi, (SOM_CYCLE_MASK >> 32) | 0xFF000000 + xor som_hi, cycle_type + + + ###################################### + # + # ANTI_ALIASING & COVERAGE CONFIGURATION + # + ###################################### + + # We need to configure the various mode bits depending + # on the AA (SOM_AA_ENABLE) and blender-to-background settings (bkg_blending). + # The bits to set are written in the AA_BLEND_TABLE. + # + # bkg_blending is set to 1 iff the blender step1 formula is configured. This + # is an assumption documented in rdpq_mode: in fact, we need bkg_blending=0 + # when just fogging is enabled (as that doesn't count as background blending), + # and in that case we need to force a second blender step to do the antialiasing. + and t0, som_lo, SOM_AA_ENABLE # Bit 3 + sll t1, bkg_blending, 2 # Bit 2 + or t0, t1 + lw t0, %lo(AA_BLEND_TABLE)(t0) # Load values to set + lw t1, %lo(AA_BLEND_MASK) # Load mask + or t0, blend_final # Merge blend_final formula into the coverage bits + + # Apply changes to SOM lower bits. These changes in t0 are the combination + # of blender settings and coverage bits. + and t0, t1 + not t1, t1 + and som_lo, t1 + or som_lo, t0 + + ###################################### + # + # AA + ALPHA COMPARE TWEAKING + # + ###################################### + + # If we use both AA and alpha compare, AA is ineffective because it uses + # the pixel coverage as blend factor (SOM_BLALPHA_CVG), but that works only + # on polygon edges (where coverage is not 1.0). + # With alpha compare, we would like to smooth on the alpha compare edges, + # not the polygon edges. So we should instead switch to SOM_BLALPHA_CVG_TIMES_CC, + # so that we modulate the coverage with the actual pixel alpha. + # Additionally we need to disable the actual alpha compare feature since it would + # compare the threshold with the alpha multiplied by coverage in this case, which would + # lead to visible seams at polygon edges. + li t0, SOM_ALPHACOMPARE_THRESHOLD | SOM_BLALPHA_CVG + and t1, som_lo, t0 + bne t0, t1, rdpq_update_finish + nop + or som_lo, SOM_BLALPHA_CVG_TIMES_CC + and som_lo, ~SOM_ALPHACOMPARE_MASK + + ###################################### + # + # SAVE SETTINGS & APPLY TO RDP + # + ###################################### + +rdpq_update_finish: + lw comb_hi, 0(s0) + lw comb_lo, 4(s0) + + # Set correct SET_COMBINE opcode (0xFC). The opcode can be anything of + # the other 4 (1pass/2pass dynamic/static). + or comb_hi, 0xFF000000 + xor comb_hi, 0xFF000000 ^ 0xFC000000 + + # Store calculated SOM into RDPQ_OTHER_MODES for debugging purposes + # (to implemented rdpq_get_other_modes_raw). Notice that we don't + # overwrite the MSB with 0xEF: it contains extended flags tha we don't + # want to lose + lbu t0, %lo(RDPQ_OTHER_MODES) + 0 + sw som_hi, %lo(RDPQ_OTHER_MODES) + 0 + sw som_lo, %lo(RDPQ_OTHER_MODES) + 4 + sb t0, %lo(RDPQ_OTHER_MODES) + 0 + + jal_and_j RDPQ_Write16, RDPQ_Finalize + +rdpq_update_fillcopy: + # We are in copy/fill mode. It is sufficient to emit a SET_OTHER_MODES + # to configure it. + or a0, som_hi, 0xFF000000 + xor a0, 0xFF000000 ^ 0xEF000000 + move a1, som_lo + jal_and_j RDPQ_Write8, RDPQ_Finalize + + .endfunc + + #undef comb_hi + #undef comb_lo + #undef som_hi + #undef som_lo + #undef comb_hi_noid + #undef blend_1cyc + #undef blend_2cyc + #undef blend_final + #undef passhthrough + #undef cycle_type + + + .section .text.rdpq_scissor + + ############################################################# + # RDPQ_WriteSetScissor + # + # Given a SET_SCISSOR command in a0/a1, writes it into RDPQ_SCISSOR_RECT + # as-is (exclusive), and then sends it to RDP after optionally adjusting + # the extents to match the current SOM cycle type. + ############################################################# + .func RDPQ_WriteSetScissor +RDPQ_WriteSetScissor: + sw a0, %lo(RDPQ_SCISSOR_RECT) + 0x0 + lb t6, %lo(RDPQ_OTHER_MODES) + 0x1 + # Bit 21 of the first word is set if FILL or COPY mode is active + andi t6, 0x1 << 5 + # Leave unchanged when not in FILL or COPY mode + beqz t6, 1f + sw a1, %lo(RDPQ_SCISSOR_RECT) + 0x4 + + # Subtract 1 subpixel from XL (bits 23:12, as 10.2 fixed point) + addiu a1, -(1 << 12) + +1: + j RDPQ_Write8 + nop + .endfunc + + + .section .text.rdpq_fillcolor + + ############################################################# + # RDPQ_WriteSetFillColor + # + # Given a 32-bit RGBA color in a1, writes it into RDPQ_FILL_COLOR + # as-is, and then sends it to RDP after optionally converting it + # into 16-bit, depending on the current target bitdepth. + ############################################################# + .func RDPQ_WriteSetFillColor +RDPQ_WriteSetFillColor: + sw a1, %lo(RDPQ_FILL_COLOR) + lbu t0, %lo(RDPQ_TARGET_BITDEPTH) + beq t0, 3, RDPQ_Write8 + lui a0, 0xF700 # SET_FILL_COLOR + srl t0, a1, 24 + (8-5) - 11 + srl t1, a1, 16 + (8-5) - 6 + srl t2, a1, 8 + (8-5) - 1 + srl t3, a1, 0 + (8-1) - 0 + andi t0, 0x1F << 11 + andi t1, 0x1F << 6 + andi t2, 0x1F << 1 + andi t3, 0x01 << 0 + or t4, t0, t1 + or t5, t2, t3 + or a1, t4, t5 + sll t0, a1, 16 + j RDPQ_Write8 + or a1, t0 + .endfunc + + +######################################################################### +# +# RDPQ_Triangle: assemble a RDP triangle command +# +########################################################################## + + .section .data.rdpq_triangle + .align 4 +TRICONST1: .half 0xFFFF,0,0,0,0x200,0x200,0x200,0x200 + + + .section .text.rdpq_triangle + + ##################################################################### + # RDPQ_Triangle + # + # INPUT: + # * a0: high 32-bit word of the triangle command. This will be + # completed with the left/right flag and the mipmap level. + # * a1,a2,a3: pointer to the triangle structures in DMEM + # * v0: 0=cull front, 1=cull back, any other value = culling disabled + # * s3: output buffer pointer + ##################################################################### + + # Implementation limits of the RSP version. These are all edge cases that are probably + # not necessary to get 100% right as they are really degenerate situations. Notice that + # most extreme/degenerated/saturated cases are still handled correctly, as verified + # by the fuzzing performed by test_rdpq_triangle; these are just the three leftovers. + # + # * Texture coordinates are accepted in s10.5 format, but a subtraction between two of them + # must not overflow a 16-bit number. This is a limit of the attribute calculation where the + # edges MA/HA are calculated with 16-bit numbers. It looks like it's not a real problem as + # it would mean having a triangle where either S or T spans more than 1024 texels within it. + # Fixing it wuold require changing MA/HA into 32-bit numbers, which has other annoying fallouts. + # * In case of quasi-degenerate triangles (lines), when the Y difference between two vertices + # is just 0.25 (0x00000001), the correct normal reciprocal would be 1.0, but it is calculated + # as 0x7FFF8000 which is 0.5 (because it's basically saturating s15.16). This means that the calculated + # edge is twice as big. Again, it doesn't matter as it can't really be seen within a 0.25 slope. + # test_rdpq_triangle has a triangle that triggers this, commented out. + # * In some cases, Z/W-related derivates (DwDx, DwDy, DzDx, DzDy) can saturate during calculation. + # in this case, the dependent D*De derivates will be wrong (how much it will depend on how far + # the real result is from the saturated number). In any case, much better than an overflow. + # test_rdpq_triangle checks if there's a saturation and skip checks for known-wrong values. + + .func RDPQ_Triangle +RDPQ_Triangle: + #define tricmd a0 + #define vtx1 a1 + #define vtx2 a2 + #define vtx3 a3 + #define cull v0 + + #define y1 t4 + #define y2 t5 + #define y3 t6 + #define x1 t7 + #define x2 t8 + #define x3 v0 + + # r, g, b, a, s, t, w, z + #define vfinal_i $v01 + #define vfinal_f $v02 + #define vdx_i $v03 + #define vdx_f $v04 + #define vde_i $v05 + #define vde_f $v06 + #define vdy_i $v07 + #define vdy_f $v08 + + #define vattr1 $v09 + #define vattr2 $v10 + #define vattr3 $v11 + #define vma $v12 + #define vha $v13 + + #define vinvw_i $v14 + #define vinvw_f $v15 + + #define vedges_i $v16 + #define vedges_f $v17 + #define vnz_i $v18 + #define vnz_f $v19 + #define vslope_i $v20 + #define vslope_f $v21 + #define vxy32 $v22 + #define vxy21 $v23 + #define vhml $v24 + #define vfy_i $v25 + #define vfy_f $v26 + + #define vtmp $v28 + #define v__ $v29 + #define invn_i $v31.e4 + #define invn_f $v31.e5 + #define invsh_i $v31.e6 + #define invsh_f $v31.e7 + + #define VTX_ATTR_X 0 + #define VTX_ATTR_Y 2 + #define VTX_ATTR_Z 4 + #define VTX_ATTR_RGBA 8 + #define VTX_ATTR_S 12 + #define VTX_ATTR_T 14 + #define VTX_ATTR_W 16 + #define VTX_ATTR_INVWi 20 + #define VTX_ATTR_INVWf 22 + + j half_swap + li t0, 1 + +swap_loop: + lh y2, VTX_ATTR_Y(vtx2) + lh y3, VTX_ATTR_Y(vtx3) + blt y2, y3, half_swap + move t1, vtx2 + move vtx2, vtx3 + move vtx3, t1 + xor cull, 1 + +half_swap: + lh y1, VTX_ATTR_Y(vtx1) + lh y2, VTX_ATTR_Y(vtx2) + blt y1, y2, swap_end + move t1, vtx1 + move vtx1, vtx2 + move vtx2, t1 + xor cull, 1 + +swap_end: + bnez t0, swap_loop + addi t0, -1 + + # We want to build this layout + # vxy32 = X3 X2 X3 -- Y3 Y2 Y3 -- + # vxy21 = X1 -- X2 -- Y1 -- Y2 -- + + lsv vxy32.e0, VTX_ATTR_X,vtx3 + lsv vxy32.e4, VTX_ATTR_Y,vtx3 + vor vxy32, vzero, vxy32.h0 + lsv vxy32.e1, VTX_ATTR_X,vtx2 + lsv vxy32.e5, VTX_ATTR_Y,vtx2 + + lsv vxy21.e0, VTX_ATTR_X,vtx1 + lsv vxy21.e2, VTX_ATTR_X,vtx2 + lsv vxy21.e4, VTX_ATTR_Y,vtx1 + lsv vxy21.e6, VTX_ATTR_Y,vtx2 + + # Store Y values in output + ssv vxy21.e4, 6,s3 # y1 + ssv vxy32.e5, 4,s3 # y2 + ssv vxy32.e4, 2,s3 # y3 + + # Now calculate: + # vxy32 = X3 X2 X3 -- Y3 Y2 Y3 -- + # - + # vxy21.0q = X1 X1 X2 X2 Y1 Y1 Y2 Y2 + # = + # vhml = HX MX LX -- HY MY LY -- + vsubc vhml, vxy32, vxy21.q0 + #define hx vhml.e0 + #define mx vhml.e1 + #define lx vhml.e2 + #define my1 vhml.e3 + #define hy vhml.e4 + #define my vhml.e5 + #define ly vhml.e6 + #define mx1 vhml.e7 + + # Duplicate MX and MY into the two empty lanes. + # vhml = HX MX LX MY HY MY LY MX + vmov mx1, mx + vmov my1, my + + # Calculate normal: compute 32-bit cross product: + # + # vhml = HX MX LX MY HY MY LY MX + # * + # vhml.3h = MY MY MY MY MX MX MX MX + # = + # nz = HX*MY -- -- -- HY*MX -- -- -- -- + vmudh vnz_f, vhml, vhml.h3 + vsar vnz_i, COP2_ACC_HI + vsar vnz_f, COP2_ACC_MD + + # Compute HY*MX - HX*MY. Result in e4. + vsubc vnz_f, vnz_f.e0 + vsub vnz_i, vnz_i.e0 + + # Extract left flag from the sign of NZ. + # Since we calculated -NZ, we need to reverse the sign + mfc2 t0, vnz_i.e4 + sge t0, t0, zero + beq t0, cull, JrRa + sll t0, 7 + or tricmd, t0 + + # Add num mipmap levels + lbu t1, %lo(RDPQ_OTHER_MODES) + 0 + andi t1, 0x38 # Isolate bits 2-5 (aka 59-61 of SOM) + or tricmd, t1 + + # Calculate reciprocal of normal + vrcph vnz_i.e0, vnz_i.e4 + vrcpl vnz_f.e0, vnz_f.e4 + vrcph vnz_i.e0, v__.e0 + #define inz_f vnz_f.e0 + #define inz_i vnz_i.e0 + + # Compute SLOPE vector + # slope = -- -- -- -- 1/HY 1/MY 1/LY 1/NZ + + # Compute ISL (L slope). 1/LY (s14.1) + vrcp vslope_f.e6, vhml.e6 + vrcph vslope_i.e6, vhml.e6 + # Compute ISM (M slope). 1/MY (s14.1) + vrcp vslope_f.e5, vhml.e5 + vrcph vslope_i.e5, vhml.e5 + # Compute ISH (H slope). 1/HY (s14.1) + vrcp vslope_f.e4, vhml.e4 + vrcph vslope_i.e4, vhml.e4 + + ################## + # 1 NR pass + ################## + vmov vslope_f.e7, inz_f + vmov vslope_i.e7, inz_i + + # Adjust multiplying by 2 (required after reciprocal) + #vmudn vslope_f, vslope_f, K2 + #vmadh vslope_i, vslope_i, K2 + vaddc vslope_f, vslope_f + vadd vslope_i, vslope_i + + # Prepare 32-bit number containing the source of the reciprocal + # Notice that we're calculating NR over 1 32-bit input (NZ) and + # 3 16-bit inputs (HY, MY, LY), for which we provide 0 in the lower + # part. + # vhml = HX MX LX MY HY MY LY NZf + # v__ = 0 0 0 0 0 0 0 NZi + vxor v__, v__ + vmov v__.e7, vnz_i.e4 + vmov vhml.e7, vnz_f.e4 + + #define vtmp_f vattr1 + #define vtmp_i vattr2 + #define vk2 vattr3 + + # NR: R*X + vmudl vtmp_f, vslope_f, vhml + vmadm vtmp_f, vslope_i, vhml + vmadn vtmp_f, vslope_f, v__ + vmadh vtmp_i, vslope_i, v__ + + # NR: 2 - R*X + vor vk2, vzero, K2 + vsubc vtmp_f, vzero, vtmp_f + vsub vtmp_i, vk2, vtmp_i + + # NR: X * (2 - R*X) + vmudl vk2, vtmp_f, vslope_f + vmadm vk2, vtmp_i, vslope_f + vmadn vslope_f, vtmp_f, vslope_i + vmadh vslope_i, vtmp_i, vslope_i + #vmadn vslope_f, vzero, vzero # re-read vslope_f in case of overflow + + # vhml = HX MX LX MY HY MY LY NZf + # v__ = 0 0 0 0 0 0 0 NZi + # slope = -- -- -- -- 1/HY 1/MY 1/LY 1/NZ + + vmov vnz_f.e0, vslope_f.e7 + vmov vnz_i.e0, vslope_i.e7 + + # Rotate slope + # slope = 1/HY 1/MY 1/LY 1/NZ -- -- -- -- + sqv vslope_f.e4, 0x10,s3 + lqv vslope_f.e0 0x10,s3 + sqv vslope_i.e4, 0x10,s3 + lqv vslope_i.e0 0x10,s3 + + # Shift left NZ (that contains INVNZ) by 2, to align with the fixed point precision + # that will be required later. + vmudn vnz_f, vnz_f, K4 + vmadh vnz_i, vnz_i, K4 + + # FY.e4 = fy (s15.16) + vsll8 vfy_f, vxy21, 14 + vsra vfy_i, vxy21, 2 + # FY.e4 = floorf(y1) - y1 + # TODO: this is always a negative fraction, so fy_i is always 0xFFFF (or fy_i=fy_f=0). + # See if we can take advantage of this somehow to simplify later. + vsubc vfy_f, vzero, vfy_f + vsub vfy_i, vfy_i + + # Finalize slope divisions by multiplying by the reciprocal. + # vhml = HX MX LX 1 HY MY LY MX + # * + # slope = 1/HY 1/MY 1/LY 1/NZ -- -- -- -- + # = + # slope = HX/HY MX/MY LX/LY -- -- -- -- -- + vmudn v__, vslope_f, vhml + vmadh v__, vslope_i, vhml + vsar vslope_f, COP2_ACC_MD + vsar vslope_i, COP2_ACC_HI + + #define ish_f vslope_f.e0 + #define ish_i vslope_i.e0 + #define ism_f vslope_f.e1 + #define ism_i vslope_i.e1 + #define isl_f vslope_f.e2 + #define isl_i vslope_i.e2 + + # Store slopes + ssv isl_f, 14,s3 + ssv isl_i, 12,s3 + ssv ism_f, 30,s3 + ssv ism_i, 28,s3 + ssv ish_f, 22,s3 + ssv ish_i, 20,s3 + + # vxy21 = X1 -- X2 -- Y1 -- Y2 -- + # slope = HX/HY MX/MY LX/LY -- -- -- -- -- + + # FINAL = X1/X2 in 16.16 precision + # TODO: maybe fold into the next MAC sequence? + vsra vfinal_i, vxy21, 2 + vsll8 vfinal_f, vxy21, 14 + + # Store X2 value in output (as XL) + ssv vfinal_f.e2, 10,s3 # XL_F + ssv vfinal_i.e2, 8,s3 # Xl_I + + # Compute XH/XM + # TODO: fy_i is always 0xFFFFFFFF here. See if we can benefit from this. + vmudl v__, vslope_f, vfy_f.e4 + vmadm v__, vslope_i, vfy_f.e4 + vmadn vedges_f, vslope_f, vfy_i.e4 + vmadh vedges_i, vslope_i, vfy_i.e4 + + vaddc vedges_f, vfinal_f.q0 + vadd vedges_i, vfinal_i.q0 + + ssv vedges_f.e1, 26,s3 # XM_F + ssv vedges_i.e1, 24,s3 # XM_I + ssv vedges_f.e0, 18,s3 # XH_F + ssv vedges_i.e0, 16,s3 # XH_I + + sh tricmd, 0(s3) + add s3, 32 + + # Load attributes into ATTR registers. + # TODO: we can interleave these in all the code above, and at that point + # it's useless to test for tricmd to save loads. Just load them all. + + #define attr1_r vattr1.e0 + #define attr2_r vattr2.e0 + #define attr3_r vattr3.e0 + #define attr1_s vattr1.e4 + #define attr2_s vattr2.e4 + #define attr3_s vattr3.e4 + #define attr1_invw vattr1.e6 + #define attr2_invw vattr2.e6 + #define attr3_invw vattr3.e6 + #define attr1_z vattr1.e7 + #define attr2_z vattr2.e7 + #define attr3_z vattr3.e7 + luv attr1_r, VTX_ATTR_RGBA,vtx1 # RGBA + luv attr2_r, VTX_ATTR_RGBA,vtx2 + luv attr3_r, VTX_ATTR_RGBA,vtx3 + + llv attr1_s, VTX_ATTR_S,vtx1 # S & T + llv attr2_s, VTX_ATTR_S,vtx2 + llv attr3_s, VTX_ATTR_S,vtx3 + + # We need to normalize INV_W in [0..1], by dividing them by the maximum INV_W. + # We will multiply by W instead, and thus we search for the minimum W. + lw t0, VTX_ATTR_W(vtx1) + lw t1, VTX_ATTR_W(vtx2) + blt t0, t1, 1f + lw t2, VTX_ATTR_W(vtx3) + move t0, t1 +1: + blt t0, t2, 1f + nop + move t0, t2 +1: + mtc2 t0, vinvw_f.e0 + srl t0, 16 + mtc2 t0, vinvw_i.e0 + + lsv vinvw_i.e4, VTX_ATTR_INVWi,vtx1 + lsv vinvw_i.e5, VTX_ATTR_INVWi,vtx2 + lsv vinvw_i.e6, VTX_ATTR_INVWi,vtx3 + + lsv vinvw_f.e4, VTX_ATTR_INVWf,vtx1 + lsv vinvw_f.e5, VTX_ATTR_INVWf,vtx2 + lsv vinvw_f.e6, VTX_ATTR_INVWf,vtx3 + + #define K_FFFF vtmp.e0 + + li s0, %lo(TRICONST1)+8 + lsv K_FFFF, -8,s0 + + # invw: minw -- -- -- invw1 invw2 invw3 -- + # + # We need to multiply minw with the three invw. All numbers are positive s16.16, + # and the result is known to fit 0..1. By doing a standard 32-bit multiplication + # on RSP, we end up with a positive s16.16 number, where the integer word is zero. + # In fact, in theory W * 1/W = 1, but both numbers are likely missing enough bits + # of precision that the result will always be slightly lower than 1 (and thus the + # integer part will be 0). + vmudl v__, vinvw_f, vinvw_f.e0 + vmadm v__, vinvw_i, vinvw_f.e0 + vmadn vinvw_f, vinvw_f, vinvw_i.e0 + vmadh vinvw_i, vinvw_i, vinvw_i.e0 + + # So now vinvw_i should be 0 (in lanes 4..6). It turns out there is one exception: + # minw == invw == 1.0. In that case, the result will be exactly 1, and thus + # vinvw_i will be 1. Since we want to simplify further calculations and avoid + # taking vinvw_i into account, we want to replace 0x1_0000 with 0x0_FFFF. + # Do a manual saturation: vinvw_f = (vinvw_i == 0 ? vinvw_f : 0xFFFF) + veq vinvw_i, vzero + vmrg vinvw_f, K_FFFF + + # Load 0x200 in the first 4 lanes of the vector, using a misaliged lqv. + # 0x200 is the constant that can be used to >>7, which will be used for + # the RGBA components. + # + # invw: 0x200 0x200 0x200 0x200 invw1 invw2 invw3 -- + lqv vinvw_f, 0,s0 + + vmudm vattr1, vinvw_f.h0 + vmudm vattr2, vinvw_f.h1 + vmudm vattr3, vinvw_f.h2 + + # Change inv_w from 0.16 to s0.15 by shifting by one + vsrl vinvw_f, vinvw_f, 1 + + # Copy inv_w components into ATTRn + vmov vattr1.e6, vinvw_f.e4 + vmov vattr2.e6, vinvw_f.e5 + vmov vattr3.e6, vinvw_f.e6 + + lsv attr1_z, VTX_ATTR_Z,vtx1 # Load Z + lsv attr2_z, VTX_ATTR_Z,vtx2 + lsv attr3_z, VTX_ATTR_Z,vtx3 + + ######################################################## + # ATTRIBUTES + ######################################################## +calc_attrs: + # MA = A2 - A1 + # HA = A3 - A1 + # NOTE: S/T coordinates are kept as s10.5, so they can overflow here. + # The subtraction is saturated so the error is minimized, but it is + # indeed there. To fix this, we would have to produce a 32-bit result here + # and then change the DX/DY calculations to use 32-bit numbers as well. + # Note also that we need "vsubc zero,zero" to clear the VCC (carry) bit + # which vsub reads as input. + vsubc vzero, vzero + vsub vma, vattr2, vattr1 + vsub vha, vattr3, vattr1 + + # vhml = HX MX LX MY1 HY MY LY MX1 + + # TODO: find other strategies to negate MY and HX? + # Or maybe this is good as we can probably interleave it, being scalar ops. + # TODO: or we could also compute -MA / -HA. But that's even more vector ops. + mfc2 t0, my + mfc2 t1, hx + neg t0 + neg t1 + mtc2 t0, my + mtc2 t1, hx + + # DX = MA * HY - HA * MY + vmudh vdx_f, vma, hy + vmadh vdx_f, vha, my + vsar vdx_f, COP2_ACC_MD + vsar vdx_i, COP2_ACC_HI + + # DY = HA * MX - MA * HX + vmudh vdy_f, vha, mx + vmadh vdy_f, vma, hx + vsar vdy_f, COP2_ACC_MD + vsar vdy_i, COP2_ACC_HI + + # DX * 1/N (TODO: check if we can pre-multiply edges to avoid this) + vmudl v__, vdx_f, inz_f + vmadm v__, vdx_i, inz_f + vmadn vdx_f, vdx_f, inz_i + vmadh vdx_i, vdx_i, inz_i + + # DY * 1/N (TODO: check if we can pre-multiply edges to avoid this) + vmudl v__, vdy_f, inz_f + vmadm v__, vdy_i, inz_f + vmadn vdy_f, vdy_f, inz_i + vmadh vdy_i, vdy_i, inz_i + + # DE = DX * invsh + DY + vmadl v__, vdx_f, ish_f + vmadm v__, vdx_i, ish_f + vmadn vde_f, vdx_f, ish_i + vmadh vde_i, vdx_i, ish_i + + # FINAL = vATTR1 + DE * FY + # TODO: fy_i is always 0xFFFFFFFF here. See if we can benefit from this. + # TODO: actually, it can also be fy_i = fy_f = 0. + vmudl v__, vde_f, vfy_f.e4 + vmadm v__, vde_i, vfy_f.e4 + vmadn vfinal_f, vde_f, vfy_i.e4 + vmadh vfinal_i, vde_i, vfy_i.e4 + vmadh vfinal_i, vattr1, K1 + + andi t0, tricmd, 0x400 + beqz t0, no_color + + # Store color + sdv vfinal_i.e0, 0x00,s3 + sdv vdx_i.e0, 0x08,s3 + sdv vfinal_f.e0, 0x10,s3 + sdv vdx_f.e0, 0x18,s3 + sdv vde_i.e0, 0x20,s3 + sdv vdy_i.e0, 0x28,s3 + sdv vde_f.e0, 0x30,s3 + sdv vdy_f.e0, 0x38,s3 + addi s3, 0x40 + +no_color: + andi t0, tricmd, 0x200 + beqz t0, no_texture + + # Store texture + sdv vfinal_i.e4, 0x00,s3 + sdv vdx_i.e4, 0x08,s3 + sdv vfinal_f.e4, 0x10,s3 + sdv vdx_f.e4, 0x18,s3 + sdv vde_i.e4, 0x20,s3 + sdv vdy_i.e4, 0x28,s3 + sdv vde_f.e4, 0x30,s3 + sdv vdy_f.e4, 0x38,s3 + addi s3, 0x40 + +no_texture: + andi t0, tricmd, 0x100 + beqz t0, JrRa + + # Store z + ssv vfinal_i.e7, 0x00,s3 + ssv vfinal_f.e7, 0x02,s3 + ssv vdx_i.e7, 0x04,s3 + ssv vdx_f.e7, 0x06,s3 + ssv vde_i.e7, 0x08,s3 + ssv vde_f.e7, 0x0A,s3 + ssv vdy_i.e7, 0x0C,s3 + ssv vdy_f.e7, 0x0E,s3 + jr ra + addi s3, 0x10 + + #undef tricm + #undef vtx1 + #undef vtx2 + #undef vtx3 + #undef cull + + #undef y1 + #undef y2 + #undef y3 + #undef x1 + #undef x2 + #undef x3 + + # r, g, b, a, s, t, w, z + #undef vfinal_i + #undef vfinal_f + #undef vdx_i + #undef vdx_f + #undef vde_i + #undef vde_f + #undef vdy_i + #undef vdy_f + + #undef vattr1 + #undef vattr2 + #undef vattr3 + #undef vma + #undef vha + + #undef vinvw_i + #undef vinvw_f + + #undef vedges_i + #undef vedges_f + #undef vnz_i + #undef vnz_f + #undef vslope_i + #undef vslope_f + #undef vxy32 + #undef vxy21 + #undef vhml + #undef vfy_i + #undef vfy_f + + #undef vtmp + #undef v__ + #undef invn_i + #undef invn_f + #undef invsh_i + #undef invsh_f + + #undef VTX_ATTR_X + #undef VTX_ATTR_Y + #undef VTX_ATTR_Z + #undef VTX_ATTR_RGBA + #undef VTX_ATTR_S + #undef VTX_ATTR_T + #undef VTX_ATTR_W + #undef VTX_ATTR_INVWi + #undef VTX_ATTR_INVWf + + .endfunc + + + diff --git a/include/rspq.h b/include/rspq.h index f3bc633aff..18702765be 100644 --- a/include/rspq.h +++ b/include/rspq.h @@ -137,6 +137,30 @@ * This feature should normally not be used by end-users, but by libraries * in which a very low latency of RSP execution is paramount to their workings. * + * ## RDP support + * + * RSPQ contains a basic support for sending commands to RDP. It is meant + * to collaborate with the RDPQ module for full RDP usage (see rdpq.h), + * but it does provide some barebone support on its own. + * + * In particulare, it allocates and handle two buffers (used with double + * buffering) that hold RDP commands generated by RSPQ overlays, where + * commands are stored to be sent to RDP via DMA. + * + * Overlays that generate RDP commands as part of their duty can call + * the assembly API RSPQ_RdpSend that will take care of sending the + * RDP commands via DMA into the RDRAM buffers (possibly swapping them + * when they are full) and also tell the RDP to run them. + * + * Notice that, while the RSP would allow also to send commands to RDP + * directly via DMEM, this is deemed as inefficient in the grand picture: + * DMEM in general is too small and would thus cause frequent stalls + * (RSP waiting for the RDP to run the commands and buffers to flush); + * at the same time, it is also hard to efficiently mix and match + * RDP buffers in DMEM and RDRAM, as that again can cause excessive + * stalling. So for the time being, this mode of working is unsupported + * by RSPQ. + * */ #ifndef __LIBDRAGON_RSPQ_H @@ -144,6 +168,8 @@ #include #include +#include +#include #include #ifdef __cplusplus @@ -292,10 +318,15 @@ void rspq_overlay_unregister(uint32_t overlay_id); * This function returns a pointer to the state area in RDRAM (not DMEM). It is * meant to modify the state on the CPU side while the overlay is not loaded. * The layout of the state and its size should be known to the caller. + * + * To avoid race conditions between overlay state access by CPU and RSP, this + * function first calls #rspq_wait to force a full sync and make sure the RSP is + * idle. As such, it should be treated as a debugging function. * * @param overlay_ucode The ucode overlay for which the state pointer will be returned. * - * @return Pointer to the overlay state (in RDRAM) + * @return Pointer to the overlay state (in RDRAM). The pointer is returned in + * the cached segment, so make sure to handle cache coherency appropriately. */ void* rspq_overlay_get_state(rsp_ucode_t *overlay_ucode); @@ -310,7 +341,7 @@ void* rspq_overlay_get_state(rsp_ucode_t *overlay_ucode); * @code{.c} * // This example adds to the queue a command called CMD_SPRITE with * // index 0xA, with its arguments, for a total of three words. The overlay - * // was previously registered via #rspq_register_overlay. + * // was previously registered via #rspq_overlay_register. * * #define CMD_SPRITE 0xA * @@ -571,7 +602,8 @@ void rspq_flush(void); * @brief Wait until all commands in the queue have been executed by RSP. * * This function blocks until all commands present in the queue have - * been executed by the RSP and the RSP is idle. + * been executed by the RSP and the RSP is idle. If the queue contained also + * RDP commands, it also waits for those commands to finish drawing. * * This function exists mostly for debugging purposes. Calling this function * is not necessary, as the CPU can continue adding commands to the queue @@ -579,9 +611,7 @@ void rspq_flush(void); * (eg: to access data that was processed by RSP) prefer using #rspq_syncpoint_new / * #rspq_syncpoint_wait which allows for more granular synchronization. */ -#define rspq_wait() ({ \ - rspq_syncpoint_wait(rspq_syncpoint_new()); \ -}) +void rspq_wait(void); /** * @brief Create a syncpoint in the queue. diff --git a/include/rspq_constants.h b/include/rspq_constants.h index 49eff8d1ce..d613f8700c 100644 --- a/include/rspq_constants.h +++ b/include/rspq_constants.h @@ -24,6 +24,11 @@ #define RSPQ_LOWPRI_CALL_SLOT (RSPQ_MAX_BLOCK_NESTING_LEVEL+0) ///< Special slot used to store the current lowpri pointer #define RSPQ_HIGHPRI_CALL_SLOT (RSPQ_MAX_BLOCK_NESTING_LEVEL+1) ///< Special slot used to store the current highpri pointer +/** Signal used by RDP SYNC_FULL command to notify that an interrupt is pending */ +#define SP_STATUS_SIG_RDPSYNCFULL SP_STATUS_SIG1 +#define SP_WSTATUS_SET_SIG_RDPSYNCFULL SP_WSTATUS_SET_SIG1 +#define SP_WSTATUS_CLEAR_SIG_RDPSYNCFULL SP_WSTATUS_CLEAR_SIG1 + /** Signal used by RSP to notify that a syncpoint was reached */ #define SP_STATUS_SIG_SYNCPOINT SP_STATUS_SIG2 #define SP_WSTATUS_SET_SIG_SYNCPOINT SP_WSTATUS_SET_SIG2 diff --git a/include/sprite.h b/include/sprite.h index 5a897bac61..52f1e05af7 100644 --- a/include/sprite.h +++ b/include/sprite.h @@ -59,6 +59,30 @@ typedef struct sprite_s uint32_t data[0]; } sprite_t; +/** + * @brief Sprite detail texture information structure. + * + * A "detail texture" is a 2D image with metadata attached to it + * to increase the perceived resolution of the main sprite when rendering + * with little to no additional TMEM usage. + * + * If the sprite uses a detail texture, its information can be retreived + * using the #sprite_get_detail_pixels function. + * + * To include a detail texture to libdragon's sprite format, use + * the mksprite tool with --detail argument. + * + * #rdpq_sprite_upload automatically uploads detail textures associated with + * the sprite. + */ +typedef struct sprite_detail_s +{ + /** @brief Is the detail texture the same as the main surface of the sprite, used for fractal detailing */ + bool use_main_tex; + /** @brief Blend factor of the detail texture in range of 0 to 1 */ + float blend_factor; +} sprite_detail_t; + #define SPRITE_FLAGS_TEXFORMAT 0x1F ///< Pixel format of the sprite #define SPRITE_FLAGS_OWNEDBUFFER 0x20 ///< Flag specifying that the sprite buffer must be freed by sprite_free #define SPRITE_FLAGS_EXT 0x80 ///< Sprite contains extended information (new format) @@ -145,6 +169,28 @@ surface_t sprite_get_pixels(sprite_t *sprite); */ surface_t sprite_get_lod_pixels(sprite_t *sprite, int num_level); +/** + * @brief Create a surface_t pointing to the contents of a detail texture. + * + * This function can be used to access detail texture within a sprite file. + * It is useful for sprites created by mksprite containing one. + * + * If there isn't a detail texture, the returned surface is 0. + * + * Additional detail information such as factor or texparms are accessible + * through the filled sprite_detail_t and rdpq_texparms_t structure. + * If you don't wish to use this information, pass NULL to the info argument(s). + * + * Notice that no memory allocations or copies are performed: + * the returned surface will point to the sprite contents. + * + * @param sprite The sprite to access + * @param info The detail information struct to fill if needed + * @param infoparms The detail texture sampling struct to fill if needed + * @return surface_t The surface containing the data. + */ +surface_t sprite_get_detail_pixels(sprite_t *sprite, sprite_detail_t *info, rdpq_texparms_t *infoparms); + /** * @brief Return a surface_t pointing to a specific tile of the spritemap. * @@ -176,6 +222,20 @@ surface_t sprite_get_tile(sprite_t *sprite, int h, int v); */ uint16_t* sprite_get_palette(sprite_t *sprite); +/** + * @brief Get a copy of the RDP texparms, optionally stored within the sprite. + * + * This function allows to obtain the RDP texparms structure stored within the + * sprite, if any. This structure is used by the RDP to set texture properties + * such as wrapping, mirroring, etc. It can be added to the sprite via + * the mksprite tool, using the `--texparms` option. + * + * @param sprite The sprite to access + * @param parms The texparms structure to fill + * @return true if the sprite contain RDP texparms, false otherwise + */ +bool sprite_get_texparms(sprite_t *sprite, rdpq_texparms_t *parms); + /** * @brief Return the number of LOD levels stored within the sprite (including the main image). * @@ -195,7 +255,7 @@ int sprite_get_lod_count(sprite_t *sprite); * textures should fit in TMEM. * * In case of 2D graphics, it is more common to have images of arbitrary size. - * They can be drawn with #rdp_draw_sprite (accelerated) or #graphics_draw_sprite + * They can be drawn with #rdpq_sprite_blit (accelerated) or #graphics_draw_sprite * (CPU) without specific limits (the RDP accelerated * version does internally need to split the sprite in multiple parts, but * that is indeed possible). diff --git a/include/surface.h b/include/surface.h index 79f316c1fd..d5a127ff1e 100644 --- a/include/surface.h +++ b/include/surface.h @@ -37,11 +37,11 @@ * a portion of the original surface: * * @code{.c} - * surface_t *fb = display_get(); // wait for a framebuffer to be ready + * surface_t *fb = display_get(); // wait for a framebuffer to be ready * * // Attach the RDP to the top 40 rows of the framebuffer * surface_t fbtop = surface_make_sub(fb, 0, 0, 320, 40); - * rdp_attach(&fbtop); + * rdpq_attach(&fbtop); * @endcode * * Surfaces created by #surface_make_sub don't need to be freed as they @@ -94,9 +94,11 @@ extern "C" { * This enum defines the pixel formats that can be used for #surface_t buffers. * The list corresponds to the pixel formats that the RDP can use as textures. * - * Notice that only some of those can be used by RDP as framebuffer (specifically, - * #FMT_RGBA16, #FMT_RGBA32 and #FMT_CI8). Moreover, the CPU-based graphics library - * graphics.h only accepts surfaces in either #FMT_RGBA16 or #FMT_RGBA32 as target buffers. + * @note Some of these formats can be used by RDP as framebuffer (specifically, + * #FMT_RGBA16, #FMT_RGBA32 and #FMT_CI8). + * @warning the CPU-based graphics library + * graphics.h only accepts surfaces in either #FMT_RGBA16 or #FMT_RGBA32 as + * target buffers, and does not assert. */ typedef enum { FMT_NONE = 0, ///< Placeholder for no format defined @@ -116,8 +118,9 @@ typedef enum { /** @brief Return the name of the texture format as a string (for debugging purposes) */ const char* tex_format_name(tex_format_t fmt); -#define SURFACE_FLAGS_TEXFORMAT 0x1F ///< Pixel format of the surface -#define SURFACE_FLAGS_OWNEDBUFFER 0x20 ///< Set if the buffer must be freed +#define SURFACE_FLAGS_TEXFORMAT 0x001F ///< Pixel format of the surface +#define SURFACE_FLAGS_OWNEDBUFFER 0x0020 ///< Set if the buffer must be freed +#define SURFACE_FLAGS_TEXINDEX 0x0F00 ///< Placeholder for rdpq lookup table /** * @brief A surface buffer for graphics @@ -151,7 +154,7 @@ typedef struct surface_s * to the caller to handle its lifetime. * * If you plan to use this format as RDP framebuffer, make sure that the provided buffer - * respects the required alignment of 64 bytes, otherwise #rdp_attach will fail. + * respects the required alignment of 64 bytes, otherwise #rdpq_attach will fail. * * @param[in] buffer Pointer to the memory buffer * @param[in] format Pixel format @@ -201,7 +204,7 @@ inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint32_t * not needed anymore. * * A surface allocated via #surface_alloc can be used as a RDP frame buffer - * (passed to #rdp_attach) because it is guaranteed to have the required + * (passed to #rdpq_attach) because it is guaranteed to have the required * alignment of 64 bytes, provided it is using one of the formats supported by * RDP as a framebuffer target (`FMT_RGBA32`, `FMT_RGBA16` or `FMT_I8`). * @@ -264,6 +267,74 @@ inline bool surface_has_owned_buffer(const surface_t *surface) { return surface->buffer != NULL && surface->flags & SURFACE_FLAGS_OWNEDBUFFER; } + +/** + * @brief Create a placeholder surface, that can be used during rdpq block recording. + * + * When recording a rspq block (via #rspq_block_begin / #rspq_block_end) it might + * be useful sometimes to issue draw commands that refer to a surface, but + * allowing the actual surface to change later at any time. + * + * See #rdpq_set_lookup_address for more information. + * + * @note A placeholder surface holds a NULL pointer to the actual bytes. Make sure + * not to use it anywhere else but with rdpq. + * + * @param index Index that will be used to lookup the surface at playback time + * @param format Pixel format + * @param width Width of the surface in pixels + * @param height Height of the surface in pixels + * @param stride Stride of the surface in bytes + * @return surface_t The initialized placeholder surface + * + * @see #surface_make_placeholder_linear + * @see #rdpq_set_lookup_address + */ +inline surface_t surface_make_placeholder(int index, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride) { + return (surface_t){ + .flags = format | (index << 8), + .width = width, + .height = height, + .stride = stride, + .buffer = NULL, + }; +} + +/** + * @brief Create a linear placeholder surface, that can be used during rdpq block recording. + * + * This function is similar to #surface_make_placeholder, but it creates + * a surface that is linearly mapped with no per-line padding or extraneous data. + * (so the stride is automatically deduced from the width). + * + * @param index Index that will be used to lookup the surface at playback time + * @param format Pixel format + * @param width Width of the surface in pixels + * @param height Height of the surface in pixels + * @return surface_t The initialized placeholder surface + * + * @see #surface_make_placeholder + */ +inline surface_t surface_make_placeholder_linear(int index, tex_format_t format, uint32_t width, uint32_t height) { + return surface_make_placeholder(index, format, width, height, TEX_FORMAT_PIX2BYTES(format, width)); +} + +/** + * @brief Returns the lookup index of a placeholder surface + * + * If ths surface is a placeholder, this function returns the associated lookup + * index that will be used to retrieve the actual surface at playback time. + * Otherwise, if it is a normal surface, this function will return 0. + * + * @param surface Placeholder surface + * @return int The lookup index of the placeholder surface, or 0 if it is a normal surface + */ +inline int surface_get_placeholder_index(const surface_t *surface) +{ + return (surface->flags >> 8) & 0xF; +} + + #ifdef __cplusplus } #endif diff --git a/n64.mk b/n64.mk index 3637828e49..df53ffa1a5 100644 --- a/n64.mk +++ b/n64.mk @@ -34,6 +34,7 @@ N64_MKDFS = $(N64_BINDIR)/mkdfs N64_TOOL = $(N64_BINDIR)/n64tool N64_SYM = $(N64_BINDIR)/n64sym N64_AUDIOCONV = $(N64_BINDIR)/audioconv64 +N64_MKSPRITE = $(N64_BINDIR)/mksprite N64_C_AND_CXX_FLAGS = -march=vr4300 -mtune=vr4300 -I$(N64_INCLUDEDIR) N64_C_AND_CXX_FLAGS += -falign-functions=32 # NOTE: if you change this, also change backtrace() in backtrace.c diff --git a/src/graphics.c b/src/graphics.c index ce12205052..be6b7c9586 100644 --- a/src/graphics.c +++ b/src/graphics.c @@ -119,6 +119,9 @@ static uint32_t b_color = 0x00000000; * This is exactly the same as calling `graphics_convert_color(RGBA32(r,g,b,a))`. * Refer to #graphics_convert_color for more information. * + * @deprecated By switching to the rdpq API, this function should not be required + * anymore. Use #RGBA32 or #RGBA16 instead. Please avoid using it in new code if possible. + * * @param[in] r * 8-bit red value * @param[in] g @@ -158,6 +161,9 @@ uint32_t graphics_make_color( int r, int g, int b, int a ) * However, for drawing with the old deprecated RDP API (in particular, * rdp_set_primitive_color), this is still required. * + * @deprecated By switching to the rdpq API, this function should not be required + * anymore. Please avoid using it in new code if possible. + * * @param[in] color * A color structure representing an RGBA color * diff --git a/src/rdp.c b/src/rdp.c index c3213b6e64..fff4b25ec7 100644 --- a/src/rdp.c +++ b/src/rdp.c @@ -1,74 +1,47 @@ /** * @file rdp.c - * @brief Hardware Display Interface + * @brief (Deprecated) Old RDP library * @ingroup rdp */ -#include -#include -#include -#include "n64sys.h" +#include "rspq.h" +#include "rdp.h" +#include "rdpq.h" +#include "rdpq_tri.h" +#include "rdpq_rect.h" +#include "rdpq_macros.h" #include "interrupt.h" #include "display.h" -#include "rdp.h" -#include "sprite.h" #include "debug.h" +#include "n64sys.h" +#include "utils.h" +#include "sprite.h" +#include +#include +#include /** - * @defgroup rdp Hardware Display Interface + * @defgroup rdp (Deprecated) Old RDP library * @ingroup display * @brief Interface to the hardware sprite/triangle rasterizer (RDP). + * + * @deprecated This module is now deprecated. Please use the new RDPQ API instead. + * + * This module contains an old API to draw using the RDP. The API was not extensible + * enough and in general did not provide a good enough foundation for RDP programming. + * So it has been deprecated in favor of the new RDPQ API, which is much more flexible. + * + * All RDP functions are now implemented as wrappers of the RDPQ API. They continue + * to work just like before, but there will be no further work on them. Also, most of + * them are explicitly marked as deprecated, and will generate a warning at compile + * time. The warning suggests the alternative RDPQ API to use instead. In most cases, + * the change should be straightforward. + * + * Functions not explicitly marked as deprecated do not have a direct equivalent in + * RDPQ API yet. * - * The hardware display interface sets up and talks with the RDP in order to render - * hardware sprites, triangles and rectangles. The RDP is a very low level rasterizer - * and needs data in a very specific format. The hardware display interface handles - * this by building commands to be sent to the RDP. - * - * Before attempting to draw anything using the RDP, the hardware display interface - * should be initialized with #rdp_init. After the RDP is no longer needed, be sure - * to free all resources using #rdp_close. - * - * Code wishing to use the hardware rasterizer should first acquire a display context - * using #display_get. Once a display context has been acquired, the RDP can be - * attached to the display context with #rdp_attach. Once the display has been - * attached, the RDP can be used to draw sprites, rectangles and textured/untextured - * triangles to the display context. Note that some functions require additional setup, - * so read the descriptions for each function before use. After code has finished - * rendering hardware assisted graphics to the display context, the RDP can be detached - * from the context using #rdp_detach. After calling thie function, it is safe - * to immediately display the rendered graphics to the screen using #display_show, or - * additional software graphics manipulation can take place using functions from the - * @ref graphics. - * - * Careful use of the #rdp_sync operation is required for proper rasterization. Before - * performing settings changes such as clipping changes or setting up texture or solid - * fill modes, code should perform a #SYNC_PIPE. A #SYNC_PIPE should be performed again - * before any new texture load. This is to ensure that the last texture operation is - * completed before attempting to change texture memory. Careful execution of texture - * operations can allow code to skip some sync operations. Be careful with excessive - * sync operations as it can stall the pipeline and cause triangles/rectangles to be - * drawn on the next display context instead of the current. - * - * #rdp_detach will automatically perform a #SYNC_FULL to ensure that everything - * has been completed in the RDP. This call generates an interrupt when complete which - * signals the main thread that it is safe to detach. Consequently, interrupts must be - * enabled for proper operation. This also means that code should under normal circumstances - * never use #SYNC_FULL. * @{ */ -/** @brief Size of the internal ringbuffer that holds pending RDP commands */ -#define RINGBUFFER_SIZE 4096 - -/** - * @brief Size of the slack are of the ring buffer - * - * Data can be written into the slack area of the ring buffer by functions creating RDP commands. - * However, when sending a completed command to the RDP, if the buffer has advanced into the slack, - * it will be cleared and the pointer reset to start. This is to stop any commands from being - * split in the middle during wraparound. - */ -#define RINGBUFFER_SLACK 1024 - /** * @brief Cached sprite structure * */ @@ -88,13 +61,6 @@ typedef struct uint16_t real_height; } sprite_cache; -/** @brief Ringbuffer where partially assembled commands will be placed before sending to the RDP */ -static uint32_t rdp_ringbuffer[RINGBUFFER_SIZE / 4]; -/** @brief Start of the command in the ringbuffer */ -static uint32_t rdp_start = 0; -/** @brief End of the command in the ringbuffer */ -static uint32_t rdp_end = 0; - /** @brief The current cache flushing strategy */ static flush_t flush_strategy = FLUSH_STRATEGY_AUTOMATIC; @@ -104,18 +70,6 @@ static volatile uint32_t wait_intr = 0; /** @brief Array of cached textures in RDP TMEM indexed by the RDP texture slot */ static sprite_cache cache[8]; -/** - * @brief RDP interrupt handler - * - * This interrupt is called when a Sync Full operation has completed and it is safe to - * use the output buffer with software - */ -static void __rdp_interrupt() -{ - /* Flag that the interrupt happened */ - wait_intr++; -} - /** * @brief Given a number, rount to a power of two * @@ -167,278 +121,11 @@ static inline uint32_t __rdp_log2( uint32_t number ) } } -/** - * @brief Return the size of the current command buffered in the ring buffer - * - * @return The size of the command in bytes - */ -static inline uint32_t __rdp_ringbuffer_size( void ) -{ - /* Normal length */ - return rdp_end - rdp_start; -} - -/** - * @brief Queue 32 bits of a command to the ring buffer - * - * @param[in] data - * 32 bits of data to be queued at the end of the current command - */ -static void __rdp_ringbuffer_queue( uint32_t data ) -{ - /* Only add commands if we have room */ - if( __rdp_ringbuffer_size() + sizeof(uint32_t) >= RINGBUFFER_SIZE ) { return; } - - /* Add data to queue to be sent to RDP */ - rdp_ringbuffer[rdp_end / 4] = data; - rdp_end += 4; -} - -/** - * @brief Send a completed command to the RDP that is queued in the ring buffer - * - * Given a validly constructred command in the ring buffer, this command will prepare the - * memory region in the ring buffer to be sent to the RDP and then start a DMA transfer, - * kicking off execution of the command in the RDP. After calling this function, it is - * safe to start writing to the ring buffer again. - */ -static void __rdp_ringbuffer_send( void ) -{ - /* Don't send nothingness */ - if( __rdp_ringbuffer_size() == 0 ) { return; } - - /* Ensure the cache is fixed up */ - data_cache_hit_writeback_invalidate(&rdp_ringbuffer[rdp_start / 4], __rdp_ringbuffer_size()); - - /* Best effort to be sure we can write once we disable interrupts */ - while( (((volatile uint32_t *)0xA4100000)[3] & 0x600) ) ; - - /* Make sure another thread doesn't attempt to render */ - disable_interrupts(); - - /* Clear XBUS/Flush/Freeze */ - ((uint32_t *)0xA4100000)[3] = 0x15; - MEMORY_BARRIER(); - - /* Don't saturate the RDP command buffer. Another command could have been written - * since we checked before disabling interrupts, but it is unlikely, so we probably - * won't stall in this critical section long. */ - while( (((volatile uint32_t *)0xA4100000)[3] & 0x600) ) ; - - /* Send start and end of buffer location to kick off the command transfer */ - MEMORY_BARRIER(); - ((volatile uint32_t *)0xA4100000)[0] = ((uint32_t)rdp_ringbuffer | 0xA0000000) + rdp_start; - MEMORY_BARRIER(); - ((volatile uint32_t *)0xA4100000)[1] = ((uint32_t)rdp_ringbuffer | 0xA0000000) + rdp_end; - MEMORY_BARRIER(); - - /* We are good now */ - enable_interrupts(); - - /* Commands themselves can't wrap around */ - if( rdp_end > (RINGBUFFER_SIZE - RINGBUFFER_SLACK) ) - { - /* Wrap around before a command can be split */ - rdp_start = 0; - rdp_end = 0; - } - else - { - /* Advance the start to not allow clobbering current command */ - rdp_start = rdp_end; - } -} - -/** - * @brief Initialize the RDP system - */ -void rdp_init( void ) -{ - /* Default to flushing automatically */ - flush_strategy = FLUSH_STRATEGY_AUTOMATIC; - - /* Set the ringbuffer up */ - rdp_start = 0; - rdp_end = 0; - - /* Set up interrupt for SYNC_FULL */ - register_DP_handler( __rdp_interrupt ); - set_DP_interrupt( 1 ); -} - -/** - * @brief Close the RDP system - * - * This function closes out the RDP system and cleans up any internal memory - * allocated by #rdp_init. - */ -void rdp_close( void ) -{ - set_DP_interrupt( 0 ); - unregister_DP_handler( __rdp_interrupt ); -} - -/** - * @brief Attach the RDP to a surface - * - * This function allows the RDP to operate on surfaces, that is memory buffers - * that can be used as render targets. For instance, it can be used with - * framebuffers acquired by calling #display_get, or to render to an offscreen - * buffer created with #surface_alloc or #surface_make. - * - * This should be performed before any rendering operations to ensure that the RDP - * has a valid output buffer to operate on. - * - * @param[in] surface - * A surface pointer - * - * @see surface_new - * @see display_get - */ -void rdp_attach( surface_t* surface ) -{ - if( surface == 0 ) { return; } - - /* Set the rasterization buffer */ - __rdp_ringbuffer_queue( 0xFF000000 | ((TEX_FORMAT_BITDEPTH(surface_get_format(surface)) == 16) ? 0x00100000 : 0x00180000) | (surface->width - 1) ); - __rdp_ringbuffer_queue( PhysicalAddr(surface->buffer) ); - __rdp_ringbuffer_send(); -} - -/** - * @brief Detach the RDP from the current surface, after the RDP will have - * finished writing to it. - * - * @note This function requires interrupts to be enabled to operate properly. - * - * This function will ensure that all RDP rendering operations have completed - * before detaching the surface. - */ -void rdp_detach( void ) -{ - /* Wait for SYNC_FULL to finish */ - wait_intr = 0; - - /* Force the RDP to rasterize everything and then interrupt us */ - rdp_sync( SYNC_FULL ); - - if( INTERRUPTS_ENABLED == get_interrupts_state() ) - { - /* Only wait if interrupts are enabled */ - while( !wait_intr ) { ; } - } - - /* Set back to zero for next detach */ - wait_intr = 0; -} - -/** - * @brief Perform a sync operation - * - * Do not use excessive sync operations between commands as this can - * cause the RDP to stall. If the RDP stalls due to too many sync - * operations, graphics may not be displayed until the next render - * cycle, causing bizarre artifacts. The rule of thumb is to only add - * a sync operation if the data you need is not yet available in the - * pipeline. - * - * @param[in] sync - * The sync operation to perform on the RDP - */ -void rdp_sync( sync_t sync ) -{ - switch( sync ) - { - case SYNC_FULL: - __rdp_ringbuffer_queue( 0xE9000000 ); - break; - case SYNC_PIPE: - __rdp_ringbuffer_queue( 0xE7000000 ); - break; - case SYNC_TILE: - __rdp_ringbuffer_queue( 0xE8000000 ); - break; - case SYNC_LOAD: - __rdp_ringbuffer_queue( 0xE6000000 ); - break; - } - __rdp_ringbuffer_queue( 0x00000000 ); - __rdp_ringbuffer_send(); -} - -/** - * @brief Set the hardware clipping boundary - * - * @param[in] tx - * Top left X coordinate in pixels - * @param[in] ty - * Top left Y coordinate in pixels - * @param[in] bx - * Bottom right X coordinate in pixels - * @param[in] by - * Bottom right Y coordinate in pixels - */ -void rdp_set_clipping( uint32_t tx, uint32_t ty, uint32_t bx, uint32_t by ) -{ - /* Convert pixel space to screen space in command */ - __rdp_ringbuffer_queue( 0xED000000 | (tx << 14) | (ty << 2) ); - __rdp_ringbuffer_queue( (bx << 14) | (by << 2) ); - __rdp_ringbuffer_send(); -} - -/** - * @brief Set the hardware clipping boundary to the entire screen - */ -void rdp_set_default_clipping( void ) -{ - /* Clip box is the whole screen */ - rdp_set_clipping( 0, 0, display_get_width(), display_get_height() ); -} - -/** - * @brief Enable display of 2D filled (untextured) rectangles - * - * This must be called before using #rdp_draw_filled_rectangle. - */ -void rdp_enable_primitive_fill( void ) -{ - /* Set other modes to fill and other defaults */ - __rdp_ringbuffer_queue( 0xEFB000FF ); - __rdp_ringbuffer_queue( 0x00004000 ); - __rdp_ringbuffer_send(); -} - -/** - * @brief Enable display of 2D filled (untextured) triangles - * - * This must be called before using #rdp_draw_filled_triangle. - */ -void rdp_enable_blend_fill( void ) -{ - __rdp_ringbuffer_queue( 0xEF0000FF ); - __rdp_ringbuffer_queue( 0x80000000 ); - __rdp_ringbuffer_send(); -} - -/** - * @brief Enable display of 2D sprites - * - * This must be called before using #rdp_draw_textured_rectangle_scaled, - * #rdp_draw_textured_rectangle, #rdp_draw_sprite or #rdp_draw_sprite_scaled. - */ -void rdp_enable_texture_copy( void ) -{ - /* Set other modes to copy and other defaults */ - __rdp_ringbuffer_queue( 0xEFA000FF ); - __rdp_ringbuffer_queue( 0x00004001 ); - __rdp_ringbuffer_send(); -} - /** * @brief Load a texture from RDRAM into RDP TMEM * - * This function will take a texture from a sprite and place it into RDP TMEM at the offset and - * texture slot specified. It is capable of pulling out a smaller texture from a larger sprite + * This function will take a texture from a surface and place it into RDP TMEM at the offset and + * texture slot specified. It is capable of pulling out a smaller texture from a larger surface * map. * * @param[in] texslot @@ -447,59 +134,40 @@ void rdp_enable_texture_copy( void ) * The offset in RDP TMEM to place this texture * @param[in] mirror_enabled * Whether to mirror this texture when displaying - * @param[in] sprite - * Pointer to the sprite structure to load the texture out of + * @param[in] surface + * Pointer to the surface structure to load the texture out of * @param[in] sl - * The pixel offset S of the top left of the texture relative to sprite space + * The pixel offset S of the top left of the texture relative to surface space * @param[in] tl - * The pixel offset T of the top left of the texture relative to sprite space + * The pixel offset T of the top left of the texture relative to surface space * @param[in] sh - * The pixel offset S of the bottom right of the texture relative to sprite space + * The pixel offset S of the bottom right of the texture relative to surface space * @param[in] th - * The pixel offset T of the bottom right of the texture relative to sprite space + * The pixel offset T of the bottom right of the texture relative to surface space * * @return The amount of texture memory in bytes that was consumed by this texture. */ -static uint32_t __rdp_load_texture( uint32_t texslot, uint32_t texloc, mirror_t mirror_enabled, sprite_t *sprite, int sl, int tl, int sh, int th ) +static uint32_t __rdp_load_texture( uint32_t texslot, uint32_t texloc, mirror_t mirror_enabled, surface_t *surface, int sl, int tl, int sh, int th ) { - int bitdepth = TEX_FORMAT_BITDEPTH(sprite_get_format(sprite)); - assertf( bitdepth == 2 || bitdepth == 4, "unsupported bitdepth (%d) for sprite", bitdepth ); - bitdepth /= 8; - - /* Invalidate data associated with sprite in cache */ + /* Invalidate data associated with surface in cache */ if( flush_strategy == FLUSH_STRATEGY_AUTOMATIC ) { - data_cache_hit_writeback_invalidate( sprite->data, sprite->width * sprite->height * bitdepth ); + data_cache_hit_writeback_invalidate( surface->buffer, surface->width * surface->height * TEX_FORMAT_BITDEPTH(surface_get_format(surface)) / 8 ); } - /* Point the RDP at the actual sprite data */ - __rdp_ringbuffer_queue( 0xFD000000 | ((bitdepth == 2) ? 0x00100000 : 0x00180000) | (sprite->width - 1) ); - __rdp_ringbuffer_queue( (uint32_t)sprite->data ); - __rdp_ringbuffer_send(); + /* Figure out the s,t coordinates of the surface we are copying out of */ + int twidth = sh - sl; + int theight = th - tl; - /* Figure out the s,t coordinates of the sprite we are copying out of */ - int twidth = sh - sl + 1; - int theight = th - tl + 1; - - /* Figure out the power of two this sprite fits into */ + /* Figure out the power of two this surface fits into */ uint32_t real_width = __rdp_round_to_power( twidth ); uint32_t real_height = __rdp_round_to_power( theight ); uint32_t wbits = __rdp_log2( real_width ); uint32_t hbits = __rdp_log2( real_height ); + tex_format_t fmt = surface_get_format(surface); - /* Because we are dividing by 8, we want to round up if we have a remainder */ - int round_amount = (real_width % 8) ? 1 : 0; - - /* Instruct the RDP to copy the sprite data out */ - __rdp_ringbuffer_queue( 0xF5000000 | ((bitdepth == 2) ? 0x00100000 : 0x00180000) | - (((((real_width / 8) + round_amount) * bitdepth) & 0x1FF) << 9) | ((texloc / 8) & 0x1FF) ); - __rdp_ringbuffer_queue( ((texslot & 0x7) << 24) | (mirror_enabled != MIRROR_DISABLED ? 0x40100 : 0) | (hbits << 14 ) | (wbits << 4) ); - __rdp_ringbuffer_send(); - - /* Copying out only a chunk this time */ - __rdp_ringbuffer_queue( 0xF4000000 | (((sl << 2) & 0xFFF) << 12) | ((tl << 2) & 0xFFF) ); - __rdp_ringbuffer_queue( (((sh << 2) & 0xFFF) << 12) | ((th << 2) & 0xFFF) ); - __rdp_ringbuffer_send(); + int pitch_shift = fmt == FMT_RGBA32 ? 1 : 0; + int tmem_pitch = ROUND_UP(TEX_FORMAT_PIX2BYTES(fmt, twidth) >> pitch_shift, 8); /* Save sprite width and height for managed sprite commands */ cache[texslot & 0x7].width = twidth - 1; @@ -508,226 +176,89 @@ static uint32_t __rdp_load_texture( uint32_t texslot, uint32_t texloc, mirror_t cache[texslot & 0x7].t = tl; cache[texslot & 0x7].real_width = real_width; cache[texslot & 0x7].real_height = real_height; - + + /* Configure the tile */ + rdpq_set_tile(texslot, surface_get_format(surface), texloc, tmem_pitch, &(rdpq_tileparms_t){ + .s.mirror = mirror_enabled != MIRROR_DISABLED ? true : false, + .s.mask = hbits, + .t.mirror = mirror_enabled != MIRROR_DISABLED ? true : false, + .t.mask = wbits, + }); + + /* Instruct the RDP to copy the sprite data out */ + rdpq_set_texture_image(surface); + rdpq_load_tile(texslot, sl, tl, sh, th); + /* Return the amount of texture memory consumed by this texture */ - return ((real_width / 8) + round_amount) * 8 * real_height * bitdepth; + return theight * tmem_pitch; } -/** - * @brief Load a sprite into RDP TMEM - * - * @param[in] texslot - * The RDP texture slot to load this sprite into (0-7) - * @param[in] texloc - * The RDP TMEM offset to place the texture at - * @param[in] mirror - * Whether the sprite should be mirrored when displaying past boundaries - * @param[in] sprite - * Pointer to sprite structure to load the texture from - * - * @return The number of bytes consumed in RDP TMEM by loading this sprite - */ uint32_t rdp_load_texture( uint32_t texslot, uint32_t texloc, mirror_t mirror, sprite_t *sprite ) { if( !sprite ) { return 0; } + assertf(sprite_get_format(sprite) == FMT_RGBA16 || sprite_get_format(sprite) == FMT_RGBA32, + "only sprites in FMT_RGBA16 or FMT_RGBA32 are supported"); - return __rdp_load_texture( texslot, texloc, mirror, sprite, 0, 0, sprite->width - 1, sprite->height - 1 ); + surface_t surface = sprite_get_pixels(sprite); + return __rdp_load_texture( texslot, texloc, mirror, &surface, 0, 0, surface.width, surface.height); } -/** - * @brief Load part of a sprite into RDP TMEM - * - * Given a sprite with vertical and horizontal slices defined, this function will load the slice specified in - * offset into texture memory. This is usefl for treating a large sprite as a tilemap. - * - * Given a sprite with 3 horizontal slices and two vertical slices, the offsets are as follows: - * - *
- * *---*---*---*
- * | 0 | 1 | 2 |
- * *---*---*---*
- * | 3 | 4 | 5 |
- * *---*---*---*
- * 
- * - * @param[in] texslot - * The RDP texture slot to load this sprite into (0-7) - * @param[in] texloc - * The RDP TMEM offset to place the texture at - * @param[in] mirror - * Whether the sprite should be mirrored when displaying past boundaries - * @param[in] sprite - * Pointer to sprite structure to load the texture from - * @param[in] offset - * Offset of the particular slice to load into RDP TMEM. - * - * @return The number of bytes consumed in RDP TMEM by loading this sprite - */ uint32_t rdp_load_texture_stride( uint32_t texslot, uint32_t texloc, mirror_t mirror, sprite_t *sprite, int offset ) { if( !sprite ) { return 0; } - - /* Figure out the s,t coordinates of the sprite we are copying out of */ - int twidth = sprite->width / sprite->hslices; - int theight = sprite->height / sprite->vslices; - - int sl = (offset % sprite->hslices) * twidth; - int tl = (offset / sprite->hslices) * theight; - int sh = sl + twidth - 1; - int th = tl + theight - 1; - - return __rdp_load_texture( texslot, texloc, mirror, sprite, sl, tl, sh, th ); + assertf(sprite_get_format(sprite) == FMT_RGBA16 || sprite_get_format(sprite) == FMT_RGBA32, + "only sprites in FMT_RGBA16 or FMT_RGBA32 are supported"); + + int ox = offset % sprite->hslices; + int oy = offset / sprite->hslices; + int tile_width = sprite->width / sprite->hslices; + int tile_height = sprite->height / sprite->vslices; + int s0 = ox * tile_width; + int t0 = oy * tile_height; + int s1 = s0 + tile_width; + int t1 = t0 + tile_height; + + surface_t surface = sprite_get_pixels(sprite); + return __rdp_load_texture( texslot, texloc, mirror, &surface, s0, t0, s1, t1); } -/** - * @brief Draw a textured rectangle with a scaled texture - * - * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture - * at a scale other than 1. This allows rectangles to be drawn with stretched or squashed textures. - * If the rectangle is larger than the texture after scaling, it will be tiled or mirrored based on the - * mirror setting given in the load texture command. - * - * Before using this command to draw a textured rectangle, use #rdp_enable_texture_copy to set the RDP - * up in texture mode. - * - * @param[in] texslot - * The texture slot that the texture was previously loaded into (0-7) - * @param[in] tx - * The pixel X location of the top left of the rectangle - * @param[in] ty - * The pixel Y location of the top left of the rectangle - * @param[in] bx - * The pixel X location of the bottom right of the rectangle - * @param[in] by - * The pixel Y location of the bottom right of the rectangle - * @param[in] x_scale - * Horizontal scaling factor - * @param[in] y_scale - * Vertical scaling factor - * @param[in] mirror - * Whether the texture should be mirrored - */ void rdp_draw_textured_rectangle_scaled( uint32_t texslot, int tx, int ty, int bx, int by, double x_scale, double y_scale, mirror_t mirror) { - uint16_t s = cache[texslot & 0x7].s << 5; - uint16_t t = cache[texslot & 0x7].t << 5; + uint16_t s = cache[texslot & 0x7].s; + uint16_t t = cache[texslot & 0x7].t; uint32_t width = cache[texslot & 0x7].width; uint32_t height = cache[texslot & 0x7].height; - /* Cant display < 0, so must clip size and move S,T coord accordingly */ - if( tx < 0 ) - { - if ( tx < -(width * x_scale) ) { return; } - s += (int)(((double)((-tx) << 5)) * (1.0 / x_scale)); - tx = 0; - } + if ( tx < -(width * x_scale) ) { return; } + if ( ty < -(height * y_scale) ) { return; } - if( ty < 0 ) - { - if ( ty < -(height * y_scale) ) { return; } - t += (int)(((double)((-ty) << 5)) * (1.0 / y_scale)); - ty = 0; - } - - // mirror horizontally or vertically + // mirror horizontally or vertically if (mirror != MIRROR_DISABLED) { if (mirror == MIRROR_X || mirror == MIRROR_XY) - s += ( (width+1) + ((cache[texslot & 0x7].real_width-(width+1))<<1) ) << 5; + s += ( (width+1) + ((cache[texslot & 0x7].real_width-(width+1))<<1)); if (mirror == MIRROR_Y || mirror == MIRROR_XY) - t += ( (height+1) + ((cache[texslot & 0x7].real_height-(height+1))<<1) ) << 5; - } - - /* Calculate the scaling constants based on a 6.10 fixed point system */ - int xs = (int)((1.0 / x_scale) * 4096.0); - int ys = (int)((1.0 / y_scale) * 1024.0); + t += ( (height+1) + ((cache[texslot & 0x7].real_height-(height+1))<<1)); + } /* Set up rectangle position in screen space */ - __rdp_ringbuffer_queue( 0xE4000000 | (bx << 14) | (by << 2) ); - __rdp_ringbuffer_queue( ((texslot & 0x7) << 24) | (tx << 14) | (ty << 2) ); - /* Set up texture position and scaling to 1:1 copy */ - __rdp_ringbuffer_queue( (s << 16) | t ); - __rdp_ringbuffer_queue( (xs & 0xFFFF) << 16 | (ys & 0xFFFF) ); - - /* Send command */ - __rdp_ringbuffer_send(); + rdpq_texture_rectangle_scaled(texslot, tx, ty, bx+1, by+1, s, t, s + width +1, t + height +1); } -/** - * @brief Draw a textured rectangle - * - * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. - * If the rectangle is larger than the texture, it will be tiled or mirrored based on the* mirror setting - * given in the load texture command. - * - * Before using this command to draw a textured rectangle, use #rdp_enable_texture_copy to set the RDP - * up in texture mode. - * - * @param[in] texslot - * The texture slot that the texture was previously loaded into (0-7) - * @param[in] tx - * The pixel X location of the top left of the rectangle - * @param[in] ty - * The pixel Y location of the top left of the rectangle - * @param[in] bx - * The pixel X location of the bottom right of the rectangle - * @param[in] by - * The pixel Y location of the bottom right of the rectangle - * @param[in] mirror - * Whether the texture should be mirrored - */ void rdp_draw_textured_rectangle( uint32_t texslot, int tx, int ty, int bx, int by, mirror_t mirror ) { /* Simple wrapper */ rdp_draw_textured_rectangle_scaled( texslot, tx, ty, bx, by, 1.0, 1.0, mirror ); } -/** - * @brief Draw a texture to the screen as a sprite - * - * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. - * - * Before using this command to draw a textured rectangle, use #rdp_enable_texture_copy to set the RDP - * up in texture mode. - * - * @param[in] texslot - * The texture slot that the texture was previously loaded into (0-7) - * @param[in] x - * The pixel X location of the top left of the sprite - * @param[in] y - * The pixel Y location of the top left of the sprite - * @param[in] mirror - * Whether the texture should be mirrored - */ void rdp_draw_sprite( uint32_t texslot, int x, int y, mirror_t mirror ) { /* Just draw a rectangle the size of the sprite */ rdp_draw_textured_rectangle_scaled( texslot, x, y, x + cache[texslot & 0x7].width, y + cache[texslot & 0x7].height, 1.0, 1.0, mirror ); } -/** - * @brief Draw a texture to the screen as a scaled sprite - * - * Given an already loaded texture, this function will draw a rectangle textured with the loaded texture. - * - * Before using this command to draw a textured rectangle, use #rdp_enable_texture_copy to set the RDP - * up in texture mode. - * - * @param[in] texslot - * The texture slot that the texture was previously loaded into (0-7) - * @param[in] x - * The pixel X location of the top left of the sprite - * @param[in] y - * The pixel Y location of the top left of the sprite - * @param[in] x_scale - * Horizontal scaling factor - * @param[in] y_scale - * Vertical scaling factor - * @param[in] mirror - * Whether the texture should be mirrored - */ void rdp_draw_sprite_scaled( uint32_t texslot, int x, int y, double x_scale, double y_scale, mirror_t mirror ) { /* Since we want to still view the whole sprite, we must resize the rectangle area too */ @@ -738,151 +269,113 @@ void rdp_draw_sprite_scaled( uint32_t texslot, int x, int y, double x_scale, dou rdp_draw_textured_rectangle_scaled( texslot, x, y, x + new_width, y + new_height, x_scale, y_scale, mirror ); } -/** - * @brief Set the primitive draw color for subsequent filled primitive operations - * - * This function sets the color of all #rdp_draw_filled_rectangle operations that follow. - * Note that in 16 bpp mode, the color must be a packed color. This means that the high - * 16 bits and the low 16 bits must both be the same color. Use #graphics_make_color or - * #graphics_convert_color to generate valid colors. - * - * @param[in] color - * Color to draw primitives in - */ -void rdp_set_primitive_color( uint32_t color ) +void rdp_set_blend_color( uint32_t color ) { - /* Set packed color */ - __rdp_ringbuffer_queue( 0xF7000000 ); - __rdp_ringbuffer_queue( color ); - __rdp_ringbuffer_send(); + rdpq_set_blend_color(color_from_packed32(color)); } -/** - * @brief Set the blend draw color for subsequent filled primitive operations - * - * This function sets the color of all #rdp_draw_filled_triangle operations that follow. - * - * @param[in] color - * Color to draw primitives in - */ -void rdp_set_blend_color( uint32_t color ) +void rdp_draw_filled_triangle( float x1, float y1, float x2, float y2, float x3, float y3 ) { - __rdp_ringbuffer_queue( 0xF9000000 ); - __rdp_ringbuffer_queue( color ); - __rdp_ringbuffer_send(); + float v1[] = {x1, y1}; + float v2[] = {x2, y2}; + float v3[] = {x3, y3}; + rdpq_triangle(&TRIFMT_FILL, v1, v2, v3); +} + +void rdp_set_texture_flush( flush_t flush ) +{ + flush_strategy = flush; +} + +/************************************** + * DEPRECATED FUNCTIONS + **************************************/ + +///@cond + +void rdp_init( void ) +{ + /* Default to flushing automatically */ + flush_strategy = FLUSH_STRATEGY_AUTOMATIC; + + rdpq_init(); +} + +void rdp_close( void ) +{ + rdpq_close(); +} + +void rdp_detach(void) +{ + // Historically, this function has behaved asynchronously when run with + // interrupts disabled, and synchronously otherwise. Keep the behavior. + rdpq_detach(); + if (get_interrupts_state() == INTERRUPTS_ENABLED) + rspq_wait(); +} + +void rdp_sync( sync_t sync ) +{ + switch( sync ) + { + case SYNC_FULL: + rdpq_sync_full(NULL, NULL); + break; + case SYNC_PIPE: + rdpq_sync_pipe(); + break; + case SYNC_TILE: + rdpq_sync_tile(); + break; + case SYNC_LOAD: + rdpq_sync_load(); + break; + } +} + +void rdp_set_clipping( uint32_t tx, uint32_t ty, uint32_t bx, uint32_t by ) +{ + /* Convert pixel space to screen space in command */ + rdpq_set_scissor(tx, ty, bx, by); +} + +void rdp_set_default_clipping( void ) +{ + /* Clip box is the whole screen */ + rdpq_set_scissor( 0, 0, display_get_width(), display_get_height() ); } -/** - * @brief Draw a filled rectangle - * - * Given a color set with #rdp_set_primitive_color, this will draw a filled rectangle - * to the screen. This is most often useful for erasing a buffer before drawing to it - * by displaying a black rectangle the size of the screen. This is much faster than - * setting the buffer blank in software. However, if you are planning on drawing to - * the entire screen, blanking may be unnecessary. - * - * Before calling this function, make sure that the RDP is set to primitive mode by - * calling #rdp_enable_primitive_fill. - * - * @param[in] tx - * Pixel X location of the top left of the rectangle - * @param[in] ty - * Pixel Y location of the top left of the rectangle - * @param[in] bx - * Pixel X location of the bottom right of the rectangle - * @param[in] by - * Pixel Y location of the bottom right of the rectangle - */ void rdp_draw_filled_rectangle( int tx, int ty, int bx, int by ) { if( tx < 0 ) { tx = 0; } if( ty < 0 ) { ty = 0; } - __rdp_ringbuffer_queue( 0xF6000000 | ( bx << 14 ) | ( by << 2 ) ); - __rdp_ringbuffer_queue( ( tx << 14 ) | ( ty << 2 ) ); - __rdp_ringbuffer_send(); + rdpq_fill_rectangle(tx, ty, bx, by); } -/** - * @brief Draw a filled triangle - * - * Given a color set with #rdp_set_blend_color, this will draw a filled triangle - * to the screen. Vertex order is not important. - * - * Before calling this function, make sure that the RDP is set to blend mode by - * calling #rdp_enable_blend_fill. - * - * @param[in] x1 - * Pixel X1 location of triangle - * @param[in] y1 - * Pixel Y1 location of triangle - * @param[in] x2 - * Pixel X2 location of triangle - * @param[in] y2 - * Pixel Y2 location of triangle - * @param[in] x3 - * Pixel X3 location of triangle - * @param[in] y3 - * Pixel Y3 location of triangle - */ -void rdp_draw_filled_triangle( float x1, float y1, float x2, float y2, float x3, float y3 ) +void rdp_enable_primitive_fill( void ) { - float temp_x, temp_y; - const float to_fixed_11_2 = 4.0f; - const float to_fixed_16_16 = 65536.0f; - - /* sort vertices by Y ascending to find the major, mid and low edges */ - if( y1 > y2 ) { temp_x = x2, temp_y = y2; y2 = y1; y1 = temp_y; x2 = x1; x1 = temp_x; } - if( y2 > y3 ) { temp_x = x3, temp_y = y3; y3 = y2; y2 = temp_y; x3 = x2; x2 = temp_x; } - if( y1 > y2 ) { temp_x = x2, temp_y = y2; y2 = y1; y1 = temp_y; x2 = x1; x1 = temp_x; } - - /* calculate Y edge coefficients in 11.2 fixed format */ - int yh = y1 * to_fixed_11_2; - int ym = (int)( y2 * to_fixed_11_2 ) << 16; // high word - int yl = y3 * to_fixed_11_2; - - /* calculate X edge coefficients in 16.16 fixed format */ - int xh = x1 * to_fixed_16_16; - int xm = x1 * to_fixed_16_16; - int xl = x2 * to_fixed_16_16; - - /* calculate inverse slopes in 16.16 fixed format */ - int dxhdy = ( y3 == y1 ) ? 0 : ( ( x3 - x1 ) / ( y3 - y1 ) ) * to_fixed_16_16; - int dxmdy = ( y2 == y1 ) ? 0 : ( ( x2 - x1 ) / ( y2 - y1 ) ) * to_fixed_16_16; - int dxldy = ( y3 == y2 ) ? 0 : ( ( x3 - x2 ) / ( y3 - y2 ) ) * to_fixed_16_16; - - /* determine the winding of the triangle */ - int winding = ( x1 * y2 - x2 * y1 ) + ( x2 * y3 - x3 * y2 ) + ( x3 * y1 - x1 * y3 ); - int flip = ( winding > 0 ? 1 : 0 ) << 23; - - __rdp_ringbuffer_queue( 0xC8000000 | flip | yl ); - __rdp_ringbuffer_queue( ym | yh ); - __rdp_ringbuffer_queue( xl ); - __rdp_ringbuffer_queue( dxldy ); - __rdp_ringbuffer_queue( xh ); - __rdp_ringbuffer_queue( dxhdy ); - __rdp_ringbuffer_queue( xm ); - __rdp_ringbuffer_queue( dxmdy ); - __rdp_ringbuffer_send(); + /* Set other modes to fill and other defaults */ + rdpq_set_other_modes_raw(SOM_CYCLE_FILL | SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | SOM_BLENDING); } -/** - * @brief Set the flush strategy for texture loads - * - * If textures are guaranteed to be in uncached RDRAM or the cache - * is flushed before calling load operations, the RDP can be told - * to skip flushing the cache. This affords a good speedup. However, - * if you are changing textures in memory on the fly or otherwise do - * not want to deal with cache coherency, set the cache strategy to - * automatic to have the RDP flush cache before texture loads. - * - * @param[in] flush - * The cache strategy, either #FLUSH_STRATEGY_NONE or - * #FLUSH_STRATEGY_AUTOMATIC. - */ -void rdp_set_texture_flush( flush_t flush ) +void rdp_enable_blend_fill( void ) { - flush_strategy = flush; + // Set a "blend fill mode": we use the alpha channel coming from the combiner + // multiplied by the BLEND register (that must be configured). + rdpq_set_other_modes_raw(SOM_CYCLE_1 | SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | + RDPQ_BLENDER((BLEND_RGB, IN_ALPHA, IN_RGB, INV_MUX_ALPHA))); } +void rdp_enable_texture_copy( void ) +{ + /* Set other modes to copy and other defaults */ + rdpq_set_other_modes_raw(SOM_CYCLE_COPY | SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | SOM_BLENDING | SOM_ALPHACOMPARE_THRESHOLD); +} + + +///@endcond + + /** @} */ diff --git a/src/rdpq/rdpq.c b/src/rdpq/rdpq.c new file mode 100644 index 0000000000..7d635de233 --- /dev/null +++ b/src/rdpq/rdpq.c @@ -0,0 +1,1139 @@ +/** + * @file rdpq.c + * @brief RDP Command queue + * @ingroup rdp + * + * # RDP Queue: implementation details + * + * This documentation block describes the internal workings of the RDP Queue. + * This is useful to understand the implementation, but it is not required + * to read or understand this to use rdpq. + * + * For description of the API of the RDP queue, see rdpq.h + * + * ## Improvements over raw hardware programming + * + * RDPQ provides a very low-level API over the RDP graphics chips, + * exposing all its settings and most of its limits. Still, rdpq + * tries to hide a few low-level hardware details to make programming the RDP + * less surprising and more orthogonal. To do so, it "patches" some RDP + * commands, typically via RSP code and depending on the current RDP state. We + * called these improvements "fixups". + * + * The documentation of the public rdpq API does not explicitly mention which + * behavior has been adjusted via fixups. Instead, this section explains in + * details all the fixups performed by rdpq. Reading this section is not + * necessary to understand and use rdpq, but it might be useful for people + * that are familiar with RDP outside of libdragon (eg: libultra programmers), + * to avoid getting confused in places where rdpq deviates from RDP (even if + * for the better). + * + * ### Scissoring and texrects: consistent coordinates + * + * The RDP SET_SCISSOR and TEXTURE_RECTANGLE commands accept a rectangle + * whose major bounds (bottom and right) are either inclusive or exclusive, + * depending on the current RDP cycle type (fill/copy: exclusive, 1cyc/2cyc: inclusive). + * #rdpq_set_scissor and #rdpq_texture_rectangle, instead, always use exclusive + * major bounds, and automatically adjust them depending on the current RDP cycle + * type. + * + * Moreover, any time the RDP cycle type changes, the current scissoring is + * adjusted to guarantee consistent results. This is especially important + * where the scissoring covers the whole framebuffer, because otherwise the + * RDP might overflow the buffer while drawing. + * + * ### Avoid color image buffer overflows with auto-scissoring + * + * The RDP SET_COLOR_IMAGE command only contains a memory pointer and a pitch: + * the RDP is not aware of the actual size of the buffer in terms of width/height, + * and expects commands to be correctly clipped, or scissoring to be configured. + * To avoid mistakes leading to memory corruption, #rdpq_set_color_image always + * reconfigures scissoring to respect the actual buffer size. + * + * Note also that when the RDP is cold-booted, the internal scissoring register + * contains random data. This means that this auto-scissoring fixup also + * provides a workaround to this, by making sure scissoring is always configured + * at least once. In fact, by forgetting to configure scissoring, the RDP + * can happily draw outside the framebuffer, or draw nothing, or even freeze. + * + * ### Autosync + * + * The RDP has different internal parallel units and exposes three different + * syncing primitives to stall and avoid write-during-use bugs: SYNC_PIPE, + * SYNC_LOAD and SYNC_TILE. Correct usage of these commands is not complicated + * but it can be complex to get right, and require extensive hardware testing + * because emulators do not implement the bugs caused by the absence of RDP stalls. + * + * rdpq implements a smart auto-syncing engine that tracks the commands sent + * to RDP (on the CPU) and automatically inserts syncing whenever necessary. + * Insertion of syncing primitives is optimal for SYNC_PIPE and SYNC_TILE, and + * conservative for SYNC_LOAD (it does not currently handle partial TMEM updates). + * + * Autosync also works within blocks, but since it is not possible to know + * the context in which a block will be run, it has to be conservative and + * might issue more stalls than necessary. + * + * More details on the autosync engine are below. + * + * ### Partial render mode changes + * + * The RDP command SET_OTHER_MODES contains most the RDP mode settings. + * Unfortunately the command does not allow to change only some settings, but + * all of them must be reconfigured. This is in contrast with most graphics APIs + * that allow to configure each render mode setting by itself (eg: it is possible + * to just change the dithering algorithm). + * + * rdpq instead tracks the current render mode on the RSP, and allows to do + * partial updates via either the low-level #rdpq_change_other_modes_raw + * function (where it is possible to change only a subset of the 56 bits), + * or via the high-level rdpq_mode_* APIs (eg: #rdpq_mode_dithering), which + * mostly build upon #rdpq_change_other_modes_raw in their implementation. + * + * ### Automatic 1/2 cycle type selection + * + * The RDP has two main operating modes: 1 cycle per pixel and 2 cycles per pixel. + * The latter is twice as slow, as the name implies, but it allows more complex + * color combiners and/or blenders. Moreover, 2-cycles mode also allows for + * multi-texturing. + * + * At the hardware level, it is up to the programmer to explicitly activate + * either 1-cycle or 2-cycle mode. The problem with this is that there are + * specific rules to follow for either mode, which does not compose cleanly + * with partial mode changes. For instance, fogging is typically implemented + * using the 2-cycle mode as it requires two passes in the blender. If the + * user disables fogging for some meshes, it might be more performant to switch + * back to 1-cycle mode, but that requires also reconfiguring the combiner. + * + * To solve this problem, the higher level rdpq mode APIs (rdpq_mode_*) + * automatically select the best cycle type depending on the current settings. + * More specifically, 1-cycle mode is preferred as it is faster, but 2-cycle + * mode is activated whenever one of the following conditions is true: + * + * * A two-pass blender is configured. + * * A two-pass combiner is configured. + * + * The correct cycle-type is automatically reconfigured any time that either + * the blender or the combiner settings are changed. Notice that this means + * that rdpq also transparently handles a few more details for the user, to + * make it for an easier API: + * + * * In 1 cycle mode, rdpq makes sure that the second pass of the combiner and + * the second pass of the blender are configured exactly like the respective + * first passes, because the RDP hardware requires this to operate correctly. + * * In 2 cycles mode, if a one-pass combiner was configured by the user, + * the second pass is automatically configured as a simple passthrough + * (equivalent to `((ZERO, ZERO, ZERO, COMBINED), (ZERO, ZERO, ZERO, COMBINED))`). + * * In 2 cycles mode, if a one-pass blender was configured by the user, + * it is configured in the second pass, while the first pass is defined + * as a passthrough (equivalent to `((PIXEL_RGB, ZERO, PIXEL_RGB, ONE))`). + * Notice that this is required because there is no pure passthrough in + * second step of the blender. + * * RDPQ_COMBINER2 macro transparently handles the texture index swap in the + * second cycle. So while using the macro, TEX0 always refers to the first + * texture and TEX1 always refers to the second texture. Moreover, uses + * of TEX0/TEX1 in passes where they are not allowed would cause compilation + * errors, to avoid triggering undefined behaviours in RDP hardware. + * + * ### Fill color as standard 32-bit color + * + * The RDP command SET_FILL_COLOR (used to configure the color register + * to be used in fill cycle type) has a very low-level interface: its argument + * is basically a 32-bit value which is copied to the framebuffer as-is, + * irrespective of the framebuffer color depth. For a 16-bit buffer, then, + * it must be programmed with two copies of the same 16-bit color. + * + * #rdpq_set_fill_color, instead, accepts a #color_t argument and does the + * conversion to the "packed" format internally, depending on the current + * framebuffer's color depth. + * + * ## Usage of inline functions vs no-inline + * + * Most of the rdpq APIs are defined as inline functions in the header rdpq.h, + * but they then internally call some non-public function to do emit the command. + * So basically the actual function is split in tow parts: an inlined part and + * a non-inlined part. + * + * The reason for this split is to help the compiler generate better code. In fact, + * it is extremely common to call rdpq functions using many constant parameters, + * and we want those constants to be propagated into the various bit shifts and masks + * to be assembled into single words. Once the (often constant) arguments have been + * handled, the rest of the operation can normally be performed in a separate + * out-of-line function. + * + * ## Sending commands to RDP + * + * This section describes in general how the commands flow from CPU to RDP via RSP. + * There are several different code-paths here depending on whether the command has + * a fixup or not, and it is part of a block. + * + * ### RDRAM vs XBUS + * + * In general, the rdpq library sends the commands to RDP using a buffer in RDRAM. + * The hardware feature called XBUS (which allows to send commands from RSP DMEM + * to RDP directly) is not used or supported. There are a few reasons for this + * architectural choice: + * + * * DMEM is limited (4K), RSP is fast and RDP is slow. Using XBUS means that + * you need to create a buffer in DMEM to hold the commands; as the buffer + * fills, RSP can trigger RDP to fetch from it, but in general RSP will + * generally be faster at filling it than RDP at executing it. At that point, + * as the buffer can't grow too much, the RSP will have to stall, slowing + * down the rspq queue, which in turns could also cause stalls on the CPU. The + * back-pressure from RDP would basically propagate to RSP and then CPU. + * * One of the main advantages of using XBUS is that there is no need to copy + * data from RSP to RDRAM, saving memory bandwidth. To partially cope up + * with it, rdpq has some other tricks up its sleeve to save memory + * bandwidth (specifically how it works in block mode, see below). + * + * The buffer in RDRAM where RDP commands are enqueued by RSP is called + * "RDP dynamic buffer". It is used as a ring buffer, so once full, it is + * recycled, making sure not to overwrite commands that the RDP has not + * executed yet. + * + * ### RDP commands in standard mode + * + * Let's check the workflow for a standard RDP command, that is one for which + * rdpq provides no fixups: + * + * * CPU (application code): a call to a rdpq function is made (eg: #rdpq_load_block). + * * CPU (rdpq code): the implementation of #rdpq_load_block enqueues a rspq command + * for the rdpq overlay. This command has the same binary encoding of a real RDP + * LOAD_BLOCK command, while still being a valid rspq command following the rspq + * structure of overlay ID + command ID. In fact, the rdpq overlay is registered + * to cover 4 overlay IDs (0xC - 0xF), so that the whole RDP command space can be + * represented by it. In our example, the command is `0xF3`. + * * RSP (rspq code): later at some point, in parallel, the rspq engine will read + * the command `0xF3`, and dispatch it to the rdpq overlay. + * * RSP (rdpq code): the implementation for command `0xF3` is the same for all + * non-fixup commands: it writes the 8 bytes of the command into a temporary + * buffer in DMEM, and then sends it via DMA to the RDP dynamic buffer in RDRAM. + * This act of forwarding a command through CPU -> RSP -> RDP is called + * "passthrough", and is implemented by `RDPQCmd_Passthrough8` and + * `RDPQCmd_Passthrough16` in the ucode (rsp_rdpq.S), and `RSPQ_RdpSend` + * in rsp_queue.inc. + * * RSP (rdpq code): after the DMA is finished, the RSP tells the RDP that + * a new command has been added to the dynamic buffer and can be executed + * whenever the RDP is ready. This is easily done by advancing the RDP + * `DP_END` register. When the buffer is finished, recycling it requires + * instead to write both `DP_START` and `DP_END`. See `RSPQCmd_RdpAppendBuffer` + * and `RSPQCmd_RdpSetBuffer` respectively. + * + * ### RDP fixups in standard mode + * + * Now let's see the workflow for a RDP fixup: these are the RDP commands which + * are modified/tweaked by RSP to provide a more sane programming interface + * to the programmer. + * + * * CPU (application code): a calls to a rdpq function is made (eg: #rdpq_set_scissor). + * * CPU (rdpq code): the implementation of #rdpq_set_scissor enqueues a rspq command + * for the rdpq overlay. This command does not need to have the same encoding of + * a real RDP command, but it is usually similar (to simplify work on the RSP). + * For instance, in our example the rdpq command is 0xD2, which is meaningless + * if sent to RDP, but has otherwise the same encoding of a real SET_SCISSOR + * (whose ID would be 0xED). + * * RSP (rspq code): later at some point, in parallel, the rspq engine will read + * the command `0xD2`, and dispatch it to the rdpq overlay. + * * RSP (rdpq code): the implementation for command `0xD2` is a RSP function called + * `RDPQCmd_SetScissorEx`. It inspects the RDP state to check the current cycle + * type and adapts the scissoring bounds if required. Then, it assembles a real + * SET_SCISSOR (with ID 0xD2) and calls `RSPQ_RdpSend` to send it to the RDP + * dynamic buffer. + * * RSP (rdpq code): after the DMA is finished, the RSP tells the RDP that + * a new command has been added to the dynamic buffer and can be executed + * whenever the RDP is ready. + * + * The overall workflow is similar to the passthrough, but the command is + * tweaked by RSP in the process. + * + * ### RDP commands in block mode + * + * In block mode, rdpq completely changes its way of operating. + * + * A rspq block (as described in rspq.c) is a buffer containing a sequence + * of rspq commands that can be played back by RSP itself, with the CPU just + * triggering it via #rspq_block_run. When using rdpq, the rspq block + * contains one additional buffer: a "RDP static buffer", which contains + * RDP commands. + * + * At block creation time, in fact, RDP commands are not enqueued as + * rspq commands, but are rather written into this separate buffer. The + * goal is to avoid the passthrough overhead: since RDP commands don't change + * during the block execution, they can be sent directly to RDP by RSP, + * referencing the RDP static buffer, without ever transferring them into + * RSP DMEM and back. + * + * Let's check the sequence at block compilation time: + * + * * CPU (application code): a call to + * * CPU (application code): a call to a rdpq function is made (eg: #rdpq_load_block), + * while compiling a block. + * * CPU (rdpq code): the implementation of #rdpq_load_block detects that a block, + * is being compiled and does two things + * 1) Append the RDP LOAD_BLOCK command to the RDP static buffer associated with the + * block. + * 2) Write the #RSPQ_CMD_RDP_APPEND_BUFFER command to the rspq block, containing the + * address of the just-written LOAD_BLOCK command. + * + * And now at block run time: + * + * * RSP (rspq code): the RSP reads the #RSPQ_CMD_RDP_APPEND_BUFFER command and + * + * enqueues a rspq command + * for the rdpq overlay. This command does not need to have the same encoding of + * a real RDP command, but it is usually similar (to simplify work on the RSP). + * For instance, in our example the rdpq command is 0xD2, which is meaningless + * if sent to RDP, but has otherwise the same encoding of a real SET_SCISSOR + * (whose ID would be 0xED). + * * RSP (rspq code): later at some point, in parallel, the rspq engine will read + * the command `0xD2`, and dispatch it to the rdpq overlay. + * * RSP (rdpq code): the implementation for command `0xD2` is a RSP function called + * `RDPQCmd_SetScissorEx`. It inspects the RDP state to check the current cycle + * type and adapts the scissoring bounds if required. Then, it assembles a real + * SET_SCISSOR (with ID 0xD2) and calls `RSPQ_RdpSend` to send it to the RDP + * dynamic buffer. + * * RSP (rdpq code): after the DMA is finished, the RSP tells the RDP that + * a new command has been added to the dynamic buffer and can be executed + * whenever the RDP is ready. + * + * ## Autosync engine + * + * As explained above, the autosync engine is able to emit sync commands + * (SYNC_PIPE, SYNC_TILE, SYNC_LOAD) automatically when necessary, liberating + * the developer from this additional task. This section describes how it + * works. + * + * The autosync engine works around one simple abstraction and logic. There are + * "hardware resources" that can be either "used" or "changed" (aka configured) + * by RDP commands. If a resource is in use, a command changing it requires + * a sync before. Each resource is tracked by one bit in a single 32-bit word + * called the "autosync state". + * + * The following resources are tracked: + * + * * Pipe. This is a generic resource encompassing all render mode and hardware + * register changes. It maps to a single bit (`AUTOSYNC_PIPE`). All render + * mode commands "change" this bit (eg: #rdpq_set_other_modes_raw or + * #rdpq_set_yuv_parms). All draw commands "use" this bit (eg: #rdpq_triangle). + * So for instance, if you draw a triangle, next #rdpq_set_mode_standard call will + * automatically issue a `SYNC_PIPE`. + * * Tiles. These are 8 resources (8 bits) mapping to the 8 tile descriptors + * in RDP hardware, used to describe textures. There is one bit per each descriptor + * (`AUTOSYNC_TILE(n)`) so that tracking is actually done at the single tile + * granularity. Commands modifying the tile descriptor (such as #rdpq_set_tile + * or #rdpq_load_tile) will "change" the resource corresponding for the affect tile. + * Commands drawing textured primitives (eg: #rdpq_texture_rectangle) will "use" + * the resource. For instance, calling #rdpq_texture_rectangle using #TILE4, and + * later calling #rdpq_load_tile on #TILE4 will cause a `SYNC_TILE` to be issued + * just before the `LOAD_TILE` command. Notice that if #rdpq_load_tile used + * #TILE5 instead, no `SYNC_TILE` would have been issued, assuming #TILE5 was + * never used before. This means that having a logic to cycle through tile + * descriptors (instead of always using the same) will reduce the number of + * `SYNC_TILE` commands. + * * TMEM. Currently, the whole TMEM is tracking as a single resource (using + * the bit defined by `AUTOSYNC_TMEM(0)`. Any command that writes to TMEM + * (eg: #rdpq_load_block) will "change" the resource. Any command that reads + * from TMEM (eg: #rdpq_triangle with a texture) will "use" the resource. + * Writing to TMEM while something is reading requires a `SYNC_LOAD` command + * to be issued. + * + * Note that there is a limit with the current implementation: the RDP can use + * multiple tiles with a single command (eg: when using multi-texturing or LODs), + * but we are not able to track that correctly: all drawing commands for now + * assume that a single tile will be used. If this proves to be a problem, it is + * always possible to call #rdpq_sync_tile to manually issue a sync. + * + * Autosync also works with blocks, albeit conservatively. When recording + * a block, it is not possible to know what the autosync state will be at the + * point of call (and obviously, it could be called in different situations + * with different states). The engine thus handles the worst case: at the + * beginning of a block, it assumes that all resources are "in use". This might + * cause some sync commands to be run in situations where it would not be + * strictly required, but the performance impact is unlikely to be noticeable. + * + * Autosync engine can be enabled or disabled via #rdpq_config_enable / + * #rdpq_config_disable. Remember that manually issuing sync commands require + * careful debugging on real hardware, as no emulator today is able to + * reproduce the effects of a missing sync command. + * + */ + +#include "rdpq.h" +#include "rdpq_internal.h" +#include "rdpq_constants.h" +#include "rdpq_debug_internal.h" +#include "rspq.h" +#include "rspq/rspq_internal.h" +#include "rspq_constants.h" +#include "rdpq_macros.h" +#include "interrupt.h" +#include "utils.h" +#include "rdp.h" +#include +#include +#include + +static void rdpq_assert_handler(rsp_snapshot_t *state, uint16_t assert_code); + +/** @brief The rdpq ucode overlay */ +DEFINE_RSP_UCODE(rsp_rdpq, + .assert_handler=rdpq_assert_handler); + +/** @brief State of the rdpq ucode overlay (partial). + * + * This must be kept in sync with rsp_rdpq.S. + * + * We don't map the whole state here as we don't need to access it from C in whole. + * We just map the initial part of the state, which is what we need. + */ +typedef struct rdpq_state_s { + uint64_t sync_full; ///< Last SYNC_FULL command + uint32_t rspq_syncpoint_id; ///< Syncpoint ID at the time of the last SYNC_FULL command + uint32_t padding; ///< Padding + uint32_t rdram_state_address; ///< Address of this state structure in RDRAM + uint32_t rdram_syncpoint_id; ///< Address of the syncpoint ID in RDRAM +} rdpq_state_t; + +/** @brief Mirror in RDRAM of the state of the rdpq ucode. */ +static rdpq_state_t *rdpq_state; + +bool __rdpq_inited = false; ///< True if #rdpq_init was called + +/** @brief Current configuration of the rdpq library. */ +static uint32_t rdpq_config; + +/** @brief RDP block management state */ +rdpq_block_state_t rdpq_block_state; + +/** @brief Tracking state of RDP */ +rdpq_tracking_t rdpq_tracking; + +/** + * @brief RDP interrupt handler + * + * The RDP interrupt is triggered after a SYNC_FULL command is finished + * (all previous RDP commands are fully completed). In case the user + * requested a callback to be called when that specific SYNC_FULL + * instance has finished, the interrupt routine must call the specified + * callback. + */ +static void __rdpq_interrupt(void) { + assertf(*SP_STATUS & SP_STATUS_SIG_RDPSYNCFULL, "Unexpected RDP interrupt"); + + // Fetch the current RDP buffer for tracing + if (rdpq_trace_fetch) rdpq_trace_fetch(false); + + // The state has been updated to contain a copy of the last SYNC_FULL command + // that was sent to RDP. The command might contain a callback to invoke. + // Extract it to local variables. + uint32_t w0 = (rdpq_state->sync_full >> 32) & 0x00FFFFFF; + uint32_t w1 = (rdpq_state->sync_full >> 0) & 0xFFFFFFFF; + + // Notify the RSP that we've serviced this SYNC_FULL interrupt. If others + // are pending, they can be scheduled now, even as we execute the callback. + MEMORY_BARRIER(); + *SP_STATUS = SP_WSTATUS_CLEAR_SIG_RDPSYNCFULL; + + // If there was a callback registered, call it. + if (w0) { + void (*callback)(void*) = (void (*)(void*))CachedAddr(w0 | 0x80000000); + void* arg = (void*)w1; + + callback(arg); + } +} + +void rdpq_init() +{ + // Do nothing if rdpq was already initialized + if (__rdpq_inited) + return; + + rspq_init(); + + // Get a pointer to the RDRAM copy of the rdpq ucode state. + rdpq_state = UncachedAddr(rspq_overlay_get_state(&rsp_rdpq)); + + // Initialize the ucode state. + memset(rdpq_state, 0, sizeof(rdpq_state_t)); + rdpq_state->rdram_state_address = PhysicalAddr(rdpq_state); + rdpq_state->rdram_syncpoint_id = PhysicalAddr(&__rspq_syncpoints_done); + assert((rdpq_state->rdram_state_address & 7) == 0); // check alignment for DMA + assert((rdpq_state->rdram_syncpoint_id & 7) == 0); // check alignment for DMA + + // Register the rdpq overlay at a fixed position (0xC) + rspq_overlay_register_static(&rsp_rdpq, RDPQ_OVL_ID); + + // Clear library globals + memset(&rdpq_block_state, 0, sizeof(rdpq_block_state)); + rdpq_config = RDPQ_CFG_DEFAULT; + rdpq_tracking.autosync = 0; + rdpq_tracking.mode_freeze = false; + + // Register an interrupt handler for DP interrupts, and activate them. + register_DP_handler(__rdpq_interrupt); + set_DP_interrupt(1); + + // Remember that initialization is complete + __rdpq_inited = true; + + // Force an initial consistent state to avoid memory corruptions and + // undefined behaviours. + rdpq_set_color_image(NULL); + rdpq_set_z_image(NULL); + rdpq_set_combiner_raw(0); + rdpq_set_other_modes_raw(0); +} + +void rdpq_close() +{ + if (!__rdpq_inited) + return; + + rspq_overlay_unregister(RDPQ_OVL_ID); + + set_DP_interrupt( 0 ); + unregister_DP_handler(__rdpq_interrupt); + + __rdpq_inited = false; +} + +uint32_t rdpq_config_set(uint32_t cfg) +{ + uint32_t prev = rdpq_config; + rdpq_config = cfg; + return prev; +} + +uint32_t rdpq_config_enable(uint32_t cfg) +{ + return rdpq_config_set(rdpq_config | cfg); +} + +uint32_t rdpq_config_disable(uint32_t cfg) +{ + return rdpq_config_set(rdpq_config & ~cfg); +} + +void rdpq_fence(void) +{ + // We want the RSP to wait until the RDP is finished. We do this in + // two steps: first we issue a SYNC_FULL (we don't need CPU-side callbacks), + // then we send the internal rspq command that make the RSP spin-wait + // until the RDP is idle. The RDP becomes idle only after SYNC_FULL is done. + rdpq_sync_full(NULL, NULL); + rspq_int_write(RSPQ_CMD_RDP_WAIT_IDLE); +} + +void rdpq_exec(void *buffer, int size) +{ + assertf(PhysicalAddr(buffer) % 8 == 0, "RDP buffer must be aligned to 8 bytes: %p", buffer); + assertf(size % 8 == 0, "RDP buffer size not multiple of 8 bytes: %d", size); + + // TODO: to implement support in blocks, we need a way to notify the block state machine that + // after this command, a new RSPQ_CMD_RDP_SET_BUFFER is required to be sent, to resume playing + // the static buffer. + assertf(!rspq_in_block(), "cannot call rdpq_exec() inside a block"); + + void *end = buffer + size; + rspq_int_write(RSPQ_CMD_RDP_SET_BUFFER, PhysicalAddr(end), PhysicalAddr(buffer), PhysicalAddr(end)); +} + +/** @brief Assert handler for RSP asserts (see "RSP asserts" documentation in rsp.h) */ +static void rdpq_assert_handler(rsp_snapshot_t *state, uint16_t assert_code) +{ + switch (assert_code) + { + case RDPQ_ASSERT_FILLCOPY_BLENDING: + printf("Cannot call rdpq_mode_blender in fill or copy mode\n"); + break; + + case RDPQ_ASSERT_MIPMAP_COMB2: + printf("Interpolated mipmap cannot work with a custom 2-pass combiner\n"); + break; + + case RDPQ_ASSERT_INVALID_CMD_TRI: + printf("RSP triangle command called but C reference implementation was enabled\n"); + break; + + case RDPQ_ASSERT_SEND_INVALID_SIZE: + printf("RDPSend buffer: %lx %lx\n", state->gpr[19], state->gpr[20]); // s3, s4 + break; + + case RDPQ_ASSERT_AUTOTMEM_FULL: + printf("TMEM is full, cannot load more data\n"); + break; + + case RDPQ_ASSERT_AUTOTMEM_UNPAIRED: + printf("incorrect usage of auto-TMEM: unpaired begin/end\n"); + break; + + default: + printf("Unknown assert\n"); + break; + } +} + +/** @brief Autosync engine: mark certain resources as in use */ +extern inline void __rdpq_autosync_use(uint32_t res); + +/** + * @brief Autosync engine: mark certain resources as being changed. + * + * This is the core of the autosync engine. Whenever a resource is "changed" + * while "in use", a SYNC command must be issued. This is a slightly conservative + * approach, as the RDP might already have finished using that resource, + * but we have no way to know it. + * The SYNC command will then reset the "use" status of each respective resource. + */ +void __rdpq_autosync_change(uint32_t res) { + res &= rdpq_tracking.autosync; + if (res) { + if ((res & AUTOSYNC_TILES) && (rdpq_config & RDPQ_CFG_AUTOSYNCTILE)) + rdpq_sync_tile(); + if ((res & AUTOSYNC_TMEMS) && (rdpq_config & RDPQ_CFG_AUTOSYNCLOAD)) + rdpq_sync_load(); + if ((res & AUTOSYNC_PIPE) && (rdpq_config & RDPQ_CFG_AUTOSYNCPIPE)) + rdpq_sync_pipe(); + } +} + +/** + * @name RDP block management functions. + * + * All the functions in this group are called in the context of creation + * of a RDP block (part of a rspq block). See the top-level documentation + * for a general overview of how RDP blocks work. + * + * @{ + */ + +/** + * @brief Initialize RDP block mangament + * + * This is called by #rspq_block_begin. It resets all the block management + * state to default. + * + * Notice that no allocation is performed. This is because we do block + * allocation lazily as soon as a rdpq command is issued. In fact, if + * the block does not contain rdpq commands, it would be a waste of time + * and memory to allocate a RDP buffer. The allocations will be performed + * by #__rdpq_block_next_buffer as soon as a rdpq command is written. + * + * @see #rspq_block_begin + * @see #__rdpq_block_next_buffer + */ +void __rdpq_block_begin() +{ + memset(&rdpq_block_state, 0, sizeof(rdpq_block_state)); + + // Save the tracking state (to be recovered when the block is done) + rdpq_block_state.previous_tracking = rdpq_tracking; + + // Set for unknown state (like if we just run another unknown block: we lost track of the RDP state) + __rdpq_block_run(NULL); +} + +/** + * @brief Allocate a new RDP block buffer, chaining it to the current one (if any) + * + * This function is called by #rdpq_passthrough_write and #rdpq_write when we are about + * to write a rdpq command in a block, and the current RDP buffer is full + * (`wptr + cmdsize >= wend`). By extension, it is also called when the current + * RDP buffer has not been allocated yet (`wptr == wend == NULL`). + * + * @see #rdpq_write + * @see #rdpq_passthrough_write + */ +void __rdpq_block_next_buffer(void) +{ + struct rdpq_block_state_s *st = &rdpq_block_state; + assertf(__rdpq_inited, "a rdpq command was issued during block recording, but rdpq_init() hasn't been called yet"); + + if (st->pending_wptr) { + st->wptr = st->pending_wptr; + st->wend = st->pending_wend; + st->pending_wptr = NULL; + st->pending_wend = NULL; + } else { + // Configure block minimum size + if (st->bufsize == 0) { + st->bufsize = RDPQ_BLOCK_MIN_SIZE; + assert(RDPQ_BLOCK_MIN_SIZE >= RDPQ_MAX_COMMAND_SIZE); + } + + // Allocate RDP static buffer. + int memsz = sizeof(rdpq_block_t) + st->bufsize*sizeof(uint32_t); + rdpq_block_t *b = malloc_uncached(memsz); + + // Chain the block to the current one (if any) + b->next = NULL; + if (st->last_node) { + st->last_node->next = b; + } + st->last_node = b; + if (!st->first_node) st->first_node = b; + + // Set write pointer and sentinel for the new buffer + st->wptr = b->cmds; + st->wend = b->cmds + st->bufsize; + } + + assertf((PhysicalAddr(st->wptr) & 0x7) == 0, + "start not aligned to 8 bytes: %lx", PhysicalAddr(st->wptr)); + assertf((PhysicalAddr(st->wend) & 0x7) == 0, + "end not aligned to 8 bytes: %lx", PhysicalAddr(st->wend)); + + // Save the pointer to the current position in the RSP queue. We're about + // to write a RSPQ_CMD_RDP_SET_BUFFER that we might need to coalesce later. + extern volatile uint32_t *rspq_cur_pointer; + st->last_rdp_append_buffer = rspq_cur_pointer; + + // Enqueue a rspq command that will make the RDP DMA registers point to the + // new buffer (though with DP_START==DP_END, as the buffer is currently empty). + rspq_int_write(RSPQ_CMD_RDP_SET_BUFFER, + PhysicalAddr(st->wptr), PhysicalAddr(st->wptr), PhysicalAddr(st->wend)); + + // Grow size for next buffer + // We use doubling here to reduce overheads for large blocks + // and at the same time start small. + if (st->bufsize < RDPQ_BLOCK_MAX_SIZE) st->bufsize *= 2; +} + +/** + * @brief Finish creation of a RDP block. + * + * This is called by #rspq_block_end. It finalizes block creation + * and return a pointer to the first node of the block, which will + * be put within the #rspq_block_t structure, so to be able to + * reference it in #__rdpq_block_run and #__rdpq_block_free. + * + * @return rdpq_block_t* The created block (first node) + * + * @see #rspq_block_end + * @see #__rdpq_block_run + * @see #__rdpq_block_free + */ +rdpq_block_t* __rdpq_block_end() +{ + struct rdpq_block_state_s *st = &rdpq_block_state; + rdpq_block_t *ret = st->first_node; + + // Save the current autosync state in the first node of the RDP block. + // This makes it easy to recover it when the block is run + if (st->first_node) + st->first_node->tracking = rdpq_tracking; + + // Recover tracking state before the block creation started + rdpq_tracking = st->previous_tracking; + + // NOTE: no rspq command is enqueued at the end of block. Specifically, + // there is no RSPQ_CMD_RDP_SET_BUFFER to switch back to the dynamic RDP buffers. + // This means that after the block is run, further RDP passthrough commands + // will be written in the trailing space of the last RDP static buffer. + // When that is filled and the sentinel is reached, the RSP will automatically + // switch to the next RDP dynamic buffer. By using the trailing space of the + // RDP static buffer, we save a buffer switch (which might even be useless + // if another block is run right after this one). + return ret; +} + +/** @brief Run a block (called by #rspq_block_run). */ +void __rdpq_block_run(rdpq_block_t *block) +{ + // We are about to run a block that contains rdpq commands. + // During creation, we tracked some state for the block + // and saved it into the block structure; set it as current, + // because from now on we can assume the block would and the + // state of the engine must match the state at the end of the block. + if (block) { + rdpq_tracking_t prev = rdpq_tracking; + rdpq_tracking = block->tracking; + + // If the data coming out of the block is "unknown", we can + // restore the previous value, because it means that the block didn't + // change it. + if (rdpq_tracking.cycle_type_known == 0) + rdpq_tracking.cycle_type_known = prev.cycle_type_known; + if (rdpq_tracking.cycle_type_frozen == 0) + rdpq_tracking.cycle_type_frozen = prev.cycle_type_frozen; + } else { + // Initialize tracking state for unknown state + rdpq_tracking = (rdpq_tracking_t){ + // current autosync status is unknown because blocks can be + // played in any context. So assume the worst: all resources + // are being used. This will cause all SYNCs to be generated, + // which is the safest option. + .autosync = ~0, + // we don't know whether mode changes will be frozen or not + // when the block will play. Assume the worst (and thus + // do not optimize out mode changes). + .mode_freeze = false, + // we don't know the cycle type after we run the block + .cycle_type_known = 0, + .cycle_type_frozen = 0, + }; + } +} + +/** + * @brief Free a block + * + * This function is called when a block is freed. It is called + * by #rspq_block_free. + * + * @see #rspq_block_free. + */ +void __rdpq_block_free(rdpq_block_t *block) +{ + // Go through the chain and free all nodes + while (block) { + void *b = block; + block = block->next; + free_uncached(b); + } +} + +/** + * @brief Reserve space in the RDP static buffer for a number of RDP commands + * + * This is called by #rdpq_write when run within a block. It makes sure that + * the static buffer has enough space for the specified number of RDP commands, + * and also switch back to the dynamic buffer if the command is going to generate + * a large or unbounded number of commands. + */ +void __rdpq_block_reserve(int num_rdp_commands) +{ + struct rdpq_block_state_s *st = &rdpq_block_state; + + if (num_rdp_commands < 0 || num_rdp_commands >= RDPQ_BLOCK_MIN_SIZE/2/2) { + // Check if there is a RDP static buffer currently active + if (st->wptr) { + // We are about to force RDP switch to dynamic buffer. Save the + // current buffer pointers as pending, so that we can switch back + // to it later. + st->pending_wptr = st->wptr; + st->pending_wend = st->wend; + // Disable internal RDP static buffer + st->wptr = NULL; + st->wend = NULL; + + // Force a switch to next dynamic buffer. + rspq_int_write(RSPQ_CMD_RDP_SET_BUFFER, 0, 0, 0); + } + } else if (num_rdp_commands > 0) { + if (__builtin_expect(st->wptr + num_rdp_commands*2 > st->wend, 0)) + __rdpq_block_next_buffer(); + + for (int i=0; iwptr++ = 0xC0000000; + *st->wptr++ = 0; + } + + // Make sure we don't coalesce with the last append command anymore, + // as there will be other RDP commands inbetween. + st->last_rdp_append_buffer = NULL; + } +} + +/** + * @brief Set a new RDP write pointer, and enqueue a RSP command to run the buffer until there + * + * This function is called by #rdpq_passthrough_write after some RDP commands have been written + * into the block's RDP buffer. A rspq command #RSPQ_CMD_RDP_APPEND_BUFFER will be issued + * so that the RSP will tell the RDP to fetch and run the new commands, appended at + * the end of the current buffer. + * + * If possible, though, this function will coalesce the command with an immediately + * preceding RSPQ_CMD_RDP_APPEND_BUFFER (or even RSPQ_CMD_RDP_SET_BUFFER, if we are + * at the start of the buffer), so that only a single RSP command is issued, which + * covers multiple RDP commands. + * + * @param wptr New block's RDP write pointer + */ +void __rdpq_block_update(volatile uint32_t *wptr) +{ + struct rdpq_block_state_s *st = &rdpq_block_state; + uint32_t phys_old = PhysicalAddr(st->wptr); + uint32_t phys_new = PhysicalAddr(wptr); + st->wptr = wptr; + + assertf((phys_old & 0x7) == 0, "old not aligned to 8 bytes: %lx", phys_old); + assertf((phys_new & 0x7) == 0, "new not aligned to 8 bytes: %lx", phys_new); + + if (st->last_rdp_append_buffer && (*st->last_rdp_append_buffer & 0xFFFFFF) == phys_old) { + // Update the previous command. + // It can be either a RSPQ_CMD_RDP_SET_BUFFER or RSPQ_CMD_RDP_APPEND_BUFFER, + // but we still need to update it to the new END pointer. + *st->last_rdp_append_buffer = (*st->last_rdp_append_buffer & 0xFF000000) | phys_new; + } else { + // A RSP command has emitted some commands since last time we emit + // RSPQ_CMD_RDP_APPEND_BUFFER. Thus we can't coalesce with the last one + // anymore: we need to emit a new RSPQ_CMD_RDP_APPEND_BUFFER in the RSP + // queue of the block + extern volatile uint32_t *rspq_cur_pointer; + st->last_rdp_append_buffer = rspq_cur_pointer; + rspq_int_write(RSPQ_CMD_RDP_APPEND_BUFFER, phys_new); + } +} + +/** @} */ + + +/** + * @name Helpers to write generic RDP commands + * + * All the functions in this group are wrappers around #rdpq_passthrough_write to help + * generating RDP commands. They are called by inlined functions in rdpq.h. + * See the top-level documentation about inline functions to understand the + * reason of this split. + * + * @{ + */ + +/** @brief Write a standard 8-byte RDP command */ +__attribute__((noinline)) +void __rdpq_write8(uint32_t cmd_id, uint32_t arg0, uint32_t arg1) +{ + rdpq_passthrough_write((cmd_id, arg0, arg1)); +} + +/** @brief Write a standard 8-byte RDP command, which changes some autosync resources */ +__attribute__((noinline)) +void __rdpq_write8_syncchange(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync) +{ + __rdpq_autosync_change(autosync); + __rdpq_write8(cmd_id, arg0, arg1); +} + +/** @brief Write a standard 8-byte RDP command, which uses some autosync resources */ +__attribute__((noinline)) +void __rdpq_write8_syncuse(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync) +{ + __rdpq_autosync_use(autosync); + __rdpq_write8(cmd_id, arg0, arg1); +} + +/** @brief Write a standard 8-byte RDP command, which changes some autosync resources and uses others. */ +__attribute__((noinline)) +void __rdpq_write8_syncchangeuse(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t autosync_c, uint32_t autosync_u) +{ + __rdpq_autosync_change(autosync_c); + __rdpq_autosync_use(autosync_u); + __rdpq_write8(cmd_id, arg0, arg1); +} + +/** @brief Write a standard 16-byte RDP command */ +__attribute__((noinline)) +void __rdpq_write16(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3) +{ + rdpq_passthrough_write((cmd_id, arg0, arg1, arg2, arg3)); +} + +/** @brief Write a standard 16-byte RDP command, which uses some autosync resources */ +__attribute__((noinline)) +void __rdpq_write16_syncuse(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t autosync) +{ + __rdpq_autosync_use(autosync); + __rdpq_write16(cmd_id, arg0, arg1, arg2, arg3); +} + +/** @brief Write a 8-byte RDP command fixup. */ +__attribute__((noinline)) +void __rdpq_fixup_write8_syncchange(uint32_t cmd_id, uint32_t w0, uint32_t w1, uint32_t autosync) +{ + __rdpq_autosync_change(autosync); + rdpq_write(1, RDPQ_OVL_ID, cmd_id, w0, w1); +} + +/** @} */ + + +/** + * @name RDP fixups out-of-line implementations + * + * These are the out-of line implementations of RDP commands which needs specific logic, + * mostly because they are fixups. + * + * @{ + */ + +/** @brief Out-of-line implementation of #rdpq_set_scissor */ +__attribute__((noinline)) +void __rdpq_set_scissor(uint32_t w0, uint32_t w1) +{ + // NOTE: SET_SCISSOR does not require SYNC_PIPE + // NOTE: We can't optimize this away into a standard SET_SCISSOR, even if + // we track the cycle type, because the RSP must always know the current + // scissoring rectangle. So we must always go through the fixup. + rdpq_write(1, RDPQ_OVL_ID, RDPQ_CMD_SET_SCISSOR_EX, w0, w1); +} + +/** @brief Out-of-line implementation of #rdpq_set_fill_color */ +__attribute__((noinline)) +void __rdpq_set_fill_color(uint32_t w1) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + rdpq_write(1, RDPQ_OVL_ID, RDPQ_CMD_SET_FILL_COLOR_32, 0, w1); +} + +/** @brief Out-of-line implementation of #rdpq_set_color_image */ +__attribute__((noinline)) +void __rdpq_set_color_image(uint32_t w0, uint32_t w1, uint32_t sw0, uint32_t sw1) +{ + // SET_COLOR_IMAGE on RSP always generates an additional SET_FILL_COLOR, + // so make sure there is space for it in case of a static buffer (in a block). + __rdpq_autosync_change(AUTOSYNC_PIPE); + rdpq_write(2, RDPQ_OVL_ID, RDPQ_CMD_SET_COLOR_IMAGE, w0, w1); + + if (rdpq_config & RDPQ_CFG_AUTOSCISSOR) + __rdpq_set_scissor(sw0, sw1); +} + +void rdpq_set_color_image(const surface_t *surface) +{ + if (__builtin_expect(!surface, 0)) { + // If a NULL surface is provided, point RDP to invalid memory (>8Mb), + // so that nothing is drawn. Also force scissoring rect to zero as additional + // safeguard (with X=1 so that auto-scissor doesn't go into negative numbers ever). + uint32_t cfg = rdpq_config_disable(RDPQ_CFG_AUTOSCISSOR); + rdpq_set_color_image_raw(0, RDPQ_VALIDATE_DETACH_ADDR, FMT_I8, 8, 8, 8); + rdpq_config_set(cfg); + rdpq_set_scissor(0, 0, 1, 0); + return; + } + assertf((PhysicalAddr(surface->buffer) & 63) == 0, + "buffer pointer is not aligned to 64 bytes, so it cannot be used as RDP color image"); + rdpq_set_color_image_raw(0, PhysicalAddr(surface->buffer), + surface_get_format(surface), surface->width, surface->height, surface->stride); +} + +void rdpq_set_z_image(const surface_t *surface) +{ + if (__builtin_expect(!surface, 0)) { + // If a NULL surface is provided, point RDP to invalid memory (>8Mb). + rdpq_set_z_image_raw(0, RDPQ_VALIDATE_DETACH_ADDR); + return; + } + assertf(TEX_FORMAT_BITDEPTH(surface_get_format(surface)) == 16, "the format of the Z-buffer surface must be 16-bit (RGBA16, IA16)"); + assertf((PhysicalAddr(surface->buffer) & 63) == 0, + "buffer pointer is not aligned to 64 bytes, so it cannot be used as RDP Z image"); + rdpq_set_z_image_raw(0, PhysicalAddr(surface->buffer)); +} + +void rdpq_set_texture_image(const surface_t *surface) +{ + tex_format_t fmt = surface_get_format(surface); + // Check if the texture is misaligned and can cause RDP crashes. This must + // be kept in sync with new findings in the validator, see check_loading_crash + // in rdpq_validate.c. + int misalign = PhysicalAddr(surface->buffer) & 15; (void)misalign; + assertf(misalign == 0 || misalign >= 8 || TEX_FORMAT_BITDEPTH(fmt) == 4, + "texture buffer address %p is misaligned and can cause RDP crashes; please use 8-bytes alignment", surface->buffer); + rdpq_set_texture_image_raw(surface_get_placeholder_index(surface), PhysicalAddr(surface->buffer), fmt, + TEX_FORMAT_BYTES2PIX(fmt, surface->stride), surface->height); +} + +/** @brief Out-of-line implementation of #rdpq_set_other_modes_raw */ +__attribute__((noinline)) +void __rdpq_set_other_modes(uint32_t w0, uint32_t w1) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + + // SOM might also generate a SET_SCISSOR. Make sure to reserve space for it. + rdpq_write(2, RDPQ_OVL_ID, RDPQ_CMD_SET_OTHER_MODES, w0, w1); + + if (w0 & (1 << (SOM_CYCLE_SHIFT-32+1))) + rdpq_tracking.cycle_type_known = 2; + else + rdpq_tracking.cycle_type_known = 1; +} + +/** @brief Out-of-line implementation of #rdpq_change_other_modes_raw */ +__attribute__((noinline)) +void __rdpq_change_other_modes(uint32_t w0, uint32_t w1, uint32_t w2) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + + // SOM might also generate a SET_SCISSOR. Make sure to reserve space for it. + rdpq_write(2, RDPQ_OVL_ID, RDPQ_CMD_MODIFY_OTHER_MODES, w0, w1, w2); + + if ((w0 == 0) && (w1 & (1 << (SOM_CYCLE_SHIFT-32+1)))) { + if (w2 & (1 << (SOM_CYCLE_SHIFT-32+1))) + rdpq_tracking.cycle_type_known = 2; + else + rdpq_tracking.cycle_type_known = 1; + } +} + +uint64_t rdpq_get_other_modes_raw(void) +{ + rsp_queue_t *state = __rspq_get_state(); + return state->rdp_mode.other_modes; +} + +void rdpq_set_tile_autotmem(int16_t tmem_bytes) +{ + if (tmem_bytes >= 0) { + assertf((tmem_bytes % 8) == 0 , "tmem_bytes must be a multiple of 8"); + tmem_bytes /= 8; + } + rspq_write(RDPQ_OVL_ID, RDPQ_CMD_AUTOTMEM_SET_ADDR, (uint16_t)tmem_bytes); +} + +void rdpq_sync_full(void (*callback)(void*), void* arg) +{ + uint32_t w0 = PhysicalAddr(callback); + uint32_t w1 = (uint32_t)arg; + + // We encode in the command (w0/w1) the callback for the RDP interrupt, + // and we need that to be forwarded to RSP dynamic command. + rdpq_write(1, RDPQ_OVL_ID, RDPQ_CMD_SYNC_FULL, w0, w1); + + // The RDP is fully idle after this command, so no sync is necessary. + rdpq_tracking.autosync = 0; +} + +void rdpq_sync_pipe(void) +{ + __rdpq_write8(RDPQ_CMD_SYNC_PIPE, 0, 0); + rdpq_tracking.autosync &= ~AUTOSYNC_PIPE; +} + +void rdpq_sync_tile(void) +{ + __rdpq_write8(RDPQ_CMD_SYNC_TILE, 0, 0); + rdpq_tracking.autosync &= ~AUTOSYNC_TILES; +} + +void rdpq_sync_load(void) +{ + __rdpq_write8(RDPQ_CMD_SYNC_LOAD, 0, 0); + rdpq_tracking.autosync &= ~AUTOSYNC_TMEMS; +} + +/** @} */ + +/* Extern inline instantiations. */ +extern inline void rdpq_set_fill_color(color_t color); +extern inline void rdpq_set_fill_color_stripes(color_t color1, color_t color2); +extern inline void rdpq_set_fog_color(color_t color); +extern inline void rdpq_set_blend_color(color_t color); +extern inline void rdpq_set_prim_color(color_t color); +extern inline void rdpq_set_detail_factor(float value); +extern inline void rdpq_set_prim_lod_frac(uint8_t value); +extern inline void rdpq_set_prim_register_raw(color_t color, uint8_t minlod, uint8_t primlod); +extern inline void rdpq_set_env_color(color_t color); +extern inline void rdpq_set_prim_depth_raw(uint16_t primitive_z, int16_t primitive_delta_z); +extern inline void rdpq_load_tlut_raw(rdpq_tile_t tile, uint8_t lowidx, uint8_t highidx); +extern inline void rdpq_set_tile_size_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t s1, uint16_t t1); +extern inline void rdpq_load_block(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t num_texels, uint16_t tmem_pitch); +extern inline void rdpq_load_block_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t num_texels, uint16_t dxt); +extern inline void rdpq_load_tile_fx(rdpq_tile_t tile, uint16_t s0, uint16_t t0, uint16_t s1, uint16_t t1); +extern inline void rdpq_set_combiner_raw(uint64_t cc); +extern inline void rdpq_set_other_modes_raw(uint64_t mode); +extern inline void rdpq_change_other_modes_raw(uint64_t mask, uint64_t val); +extern inline void rdpq_set_color_image_raw(uint8_t index, uint32_t offset, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride); +extern inline void rdpq_set_z_image_raw(uint8_t index, uint32_t offset); +extern inline void rdpq_set_texture_image_raw(uint8_t index, uint32_t offset, tex_format_t format, uint16_t width, uint16_t height); +extern inline void rdpq_set_lookup_address(uint8_t index, void* rdram_addr); +extern inline void rdpq_set_tile(rdpq_tile_t tile, tex_format_t format, int32_t tmem_addr, uint16_t tmem_pitch, const rdpq_tileparms_t *parms); diff --git a/src/rdpq/rdpq_attach.c b/src/rdpq/rdpq_attach.c new file mode 100644 index 0000000000..7c09aa3a60 --- /dev/null +++ b/src/rdpq/rdpq_attach.c @@ -0,0 +1,141 @@ +/** + * @file rdpq_attach.c + * @brief RDP Command queue: surface attachment API + * @ingroup rdp + */ + +#include "rdpq.h" +#include "rdpq_mode.h" +#include "rdpq_rect.h" +#include "rdpq_attach.h" +#include "rdpq_internal.h" +#include "rspq.h" +#include "display.h" +#include "debug.h" + +/** @brief Size of the internal stack of attached surfaces */ +#define ATTACH_STACK_SIZE 4 + +static const surface_t* attach_stack[ATTACH_STACK_SIZE][2] = { { NULL, NULL } }; +static int attach_stack_ptr = 0; + +bool rdpq_is_attached(void) +{ + return attach_stack_ptr > 0; +} + +static void attach(const surface_t *surf_color, const surface_t *surf_z, bool clear_clr, bool clear_z) +{ + assertf(attach_stack_ptr < ATTACH_STACK_SIZE, "Too many nested attachments"); + + attach_stack[attach_stack_ptr][0] = surf_color; + attach_stack[attach_stack_ptr][1] = surf_z; + attach_stack_ptr++; + + if (clear_clr || clear_z) + rdpq_mode_push(); + + if (surf_z) { + assertf(surf_z-> width == surf_color->width && surf_z->height == surf_color->height, + "Color and Z buffers must have the same size"); + + if (clear_z) { + rdpq_set_color_image(surf_z); + rdpq_set_mode_fill(color_from_packed16(0xFFFC)); + rdpq_fill_rectangle(0, 0, surf_z->width, surf_z->height); + } + } + rdpq_set_z_image(surf_z); + + if (clear_clr) { + rdpq_set_color_image(surf_color); + rdpq_set_mode_fill(color_from_packed32(0x000000FF)); + rdpq_fill_rectangle(0, 0, surf_color->width, surf_color->height); + } + rdpq_set_color_image(surf_color); + + if (clear_clr || clear_z) + rdpq_mode_pop(); +} + +static void detach(void) +{ + const surface_t *color = NULL, *z = NULL; + + // Reattach to the previous surface in the stack (if any) + attach_stack_ptr--; + if (attach_stack_ptr > 0) { + color = attach_stack[attach_stack_ptr-1][0]; + z = attach_stack[attach_stack_ptr-1][1]; + } + rdpq_set_z_image(z); + rdpq_set_color_image(color); + rspq_flush(); +} + +void rdpq_attach(const surface_t *surf_color, const surface_t *surf_z) +{ + assertf(__rdpq_inited, "rdpq not initialized: please call rdpq_init()"); + attach(surf_color, surf_z, false, false); +} + +void rdpq_attach_clear(const surface_t *surf_color, const surface_t *surf_z) +{ + attach(surf_color, surf_z, true, true); +} + +/** @brief Like #rdpq_clear, but with optional fill color configuration */ +void __rdpq_clear(const color_t *clr) +{ + extern void __rdpq_set_mode_fill(void); + assertf(rdpq_is_attached(), "No render target is currently attached"); + + rdpq_mode_push(); + __rdpq_set_mode_fill(); + if (clr) rdpq_set_fill_color(*clr); + rdpq_fill_rectangle(0, 0, attach_stack[attach_stack_ptr-1][0]->width, attach_stack[attach_stack_ptr-1][0]->height); + rdpq_mode_pop(); +} + +/** @brief Like #rdpq_clear_z, but with optional fill z value configuration */ +void __rdpq_clear_z(const uint16_t *z) +{ + extern void __rdpq_set_mode_fill(void); + assertf(rdpq_is_attached(), "No render target is currently attached"); + + const surface_t *surf_z = attach_stack[attach_stack_ptr-1][1]; + assertf(surf_z, "No Z buffer is currently attached"); + + // Disable autoscissor, so that when we attach to the Z buffer, we + // keep the previous scissor rect. This is probably expected by the user + // for symmetry with rdpq_clear that does respect the scissor rect. + uint32_t old_cfg = rdpq_config_disable(RDPQ_CFG_AUTOSCISSOR); + rdpq_attach(surf_z, NULL); + rdpq_mode_push(); + __rdpq_set_mode_fill(); + if (z) rdpq_set_fill_color(color_from_packed16(*z)); + rdpq_fill_rectangle(0, 0, surf_z->width, surf_z->height); + rdpq_mode_pop(); + rdpq_detach(); + rdpq_config_set(old_cfg); +} + +void rdpq_detach_cb(void (*cb)(void*), void *arg) +{ + assertf(rdpq_is_attached(), "No render target is currently attached"); + + rdpq_sync_full(cb, arg); + detach(); +} + +void rdpq_detach_show(void) +{ + assertf(rdpq_is_attached(), "No render target is currently attached"); + rdpq_detach_cb((void (*)(void*))display_show, (void*)attach_stack[attach_stack_ptr-1][0]); +} + +/* Extern inline instantiations. */ +extern inline void rdpq_clear(color_t color); +extern inline void rdpq_clear_z(uint16_t z); +extern inline void rdpq_detach(void); +extern inline void rdpq_detach_wait(void); diff --git a/src/rdpq/rdpq_debug.c b/src/rdpq/rdpq_debug.c new file mode 100644 index 0000000000..07020fb069 --- /dev/null +++ b/src/rdpq/rdpq_debug.c @@ -0,0 +1,1589 @@ +/** + * @file rdpq_debug.c + * @brief RDP Command queue: debugging helpers + * @ingroup rdp + */ +#include "rdpq_debug.h" +#include "rdpq_debug_internal.h" +#ifdef N64 +#include "rdpq.h" +#include "rspq.h" +#include "rdpq_rect.h" +#include "rdpq_mode.h" +#include "rdpq_internal.h" +#include "rdp.h" +#include "debug.h" +#include "interrupt.h" +#include "utils.h" +#include "rspq_constants.h" +#include "rdpq_constants.h" +#else +///@cond +#define debugf(msg, ...) fprintf(stderr, msg, ##__VA_ARGS__) +#define MIN(a,b) ((a)<(b)?(a):(b)) +#define MAX(a,b) ((a)>(b)?(a):(b)) +///@endcond +#endif +#include +#include +#include +#include +///@cond +#define __STDC_FORMAT_MACROS +#include +///@endcond + +/** @brief RDP Debug command: turn on/off logging */ +#define RDPQ_CMD_DEBUG_SHOWLOG 0x00010000 +/** @brief RDP Debug command: debug message */ +#define RDPQ_CMD_DEBUG_MESSAGE 0x00020000 + +/** @brief Flags that configure the logging */ +int __rdpq_debug_log_flags; + +#ifndef RDPQ_DEBUG_DEBUG +/** + * @brief Internal debugging of rdpq_debug. + * + * Define to 1 to active internal debugging of the rdpq debug module. + * This is useful to trace bugs of rdpq itself, but it should not be + * necessary for standard debugging sessions of application code, so it + * is turned off by default. + */ +#define RDPQ_DEBUG_DEBUG 0 +#endif + +#if RDPQ_DEBUG_DEBUG +/** @brief Like debugf, but guarded by #RDPQ_DEBUG_DEBUG */ +#define intdebugf(...) debugf(__VA_ARGS__) +#else +/** @brief Like debugf, but guarded by #RDPQ_DEBUG_DEBUG */ +#define intdebugf(...) ({ }) +#endif + +/** @brief Extract bits from word */ +#define BITS(v, b, e) ((unsigned int)((v) << (63-(e)) >> (63-(e)+(b)))) +/** @brief Extract bit from word */ +#define BIT(v, b) BITS(v, b, b) +/** @brief Extract bits from word as signed quantity */ +#define SBITS(v, b, e) (int)BITS((int64_t)(v), b, e) +/** @brief Extract command ID from RDP command word */ +#define CMD(v) BITS((v), 56, 61) +/** @brief Check if a command is a triangle */ +#define CMD_IS_TRI(cmd) ((cmd) >= 0x8 && (cmd) <= 0xF) + +/** @brief A buffer sent to RDP via DMA */ +typedef struct { + uint64_t *start; ///< Start pointer + uint64_t *end; ///< End pointer + uint64_t *traced; ///< End pointer of already-traced commands +} rdp_buffer_t; + +/** @brief Decoded SET_COMBINE command */ +typedef struct { + ///@cond + struct cc_cycle_s { + struct { uint8_t suba, subb, mul, add; } rgb; + struct { uint8_t suba, subb, mul, add; } alpha; + } cyc[2]; + ///@endcond +} colorcombiner_t; + +/** @brief Decoded SET_OTHER_MODES command */ +typedef struct { + ///@cond + bool atomic; + uint8_t cycle_type; + struct { bool persp, detail, sharpen, lod; } tex; + struct { bool enable; uint8_t type; } tlut; + uint8_t sample_type; + uint8_t tf_mode; + bool chromakey; + struct { uint8_t rgb, alpha; } dither; + struct blender_s { uint8_t p, a, q, b; } blender[2]; + bool blend, read, aa; + struct { uint8_t mode; bool color, sel_alpha, mul_alpha; } cvg; + struct { uint8_t mode; bool upd, cmp, prim; } z; + struct { bool enable, noise; } alphacmp; + struct { bool fog, freeze, bl2; } rdpqx; // rdpq extensions + ///@endcond +} setothermodes_t; + +/** + * @brief Current RDP state + * + * This structure represents a mirror of the internal state of the RDP. + * It is updated by the validator as commands flow through, and is then used + * to validate the consistency of next commands. + */ +static struct { + struct { + bool pipe; ///< True if the pipe is busy (SYNC_PIPE required) + bool tile[8]; ///< True if each tile is a busy (SYNC_TILE required) + uint8_t tmem[64]; ///< Bitarray: busy state for each 8-byte word of TMEM (SYNC_LOAD required) + } busy; ///< Busy entities (for SYNC commands) + struct { + bool sent_scissor : 1; ///< True if at least one SET_SCISSOR was sent since reset + bool sent_zprim : 1; ///< True if SET_PRIM_DEPTH was sent + bool mode_changed : 1; ///< True if there is a pending mode change to validate (SET_OTHER_MODES / SET_COMBINE) + bool rendertarget_changed : 1; ///< True if there is a pending render target change to validate (SET_COLOR_IMAGE / SET_SCISSOR) + }; + uint64_t *last_som; ///< Pointer to last SOM command sent + uint64_t last_som_data; ///< Last SOM command (raw) + uint64_t *last_cc; ///< Pointer to last CC command sent + uint64_t last_cc_data; ///< Last CC command (raw) + uint64_t *last_col; ///< Pointer to last SET_COLOR_IMAGE command sent + uint64_t last_col_data; ///< Last COLOR command (raw) + uint64_t *last_tex; ///< Pointer to last SET_TEX_IMAGE command sent + uint64_t last_tex_data; ///< Last TEX command (raw) + uint64_t *last_z; ///< Pointer to last SET_Z_IMAGE command sent + uint64_t last_z_data; ///< Last Z command (raw) + setothermodes_t som; ///< Current SOM state + colorcombiner_t cc; ///< Current CC state + struct tile_s { + uint64_t *last_settile; ///< Pointer to last SET_TILE command sent + uint64_t *last_setsize; ///< Pointer to last LOAD_TILE/SET_TILE_SIZE command sent + uint64_t last_settile_data; ///< Last SET_TILE command (raw) + uint64_t last_setsize_data; ///< Last LOAD_TILE/SET_TILE_SIZE command (raw) + uint8_t fmt, size; ///< Format & size (RDP format/size bits) + uint8_t pal; ///< Palette number + bool has_extents; ///< True if extents were set (via LOAD_TILE / SET_TILE_SIZE) + float s0, t0, s1, t1; ///< Extents of tile in TMEM + int16_t tmem_addr; ///< Address in TMEM + int16_t tmem_pitch; ///< Pitch in TMEM + struct { + uint8_t mask; ///< Mask (RDP mask bits) + bool clamp; ///< Clamping enabled + bool mirror; ///< Mirroring enabled + } s, t; ///< Settings for S&T coordinates + } tile[8]; ///< Current tile descriptors + struct { + uint8_t fmt, size; ///< Format & size (RDP format/size bits) + uint16_t width, height; ///< Dimensions of the color image + } col; ///< Current associated color image + struct { + uint32_t physaddr; ///< Physical address of the texture + uint8_t fmt, size; ///< Format & size (RDP format/size bits) + } tex; ///< Current associated texture image + struct { + uint16_t x0,y0,x1,y1; ///< Scissor extents + } clip; ///< Current scissor extents +} rdp; + +/** + * @brief Validator context + */ +struct { + uint64_t *buf; ///< Current instruction + uint32_t flags; ///< Flags (see RDPQ_VALIDATION_*) + int warns, errs; ///< Validators warnings/errors (stats) + bool crashed; ///< True if the RDP chip crashed +} vctx; + +/** @brief Triangle primitives names */ +static const char *tri_name[] = { "TRI", "TRI_Z", "TRI_TEX", "TRI_TEX_Z", "TRI_SHADE", "TRI_SHADE_Z", "TRI_TEX_SHADE", "TRI_TEX_SHADE_Z"}; +static const char *tex_fmt_name[] = { "RGBA", "YUV", "CI", "IA", "I", "?", "?", "?" }; + +/** @brief Helper function to coalesce disassembled triangles */ +static bool log_coalesce_tris(uint8_t cmd, uint8_t *last_tri_cmd, int *num_tris); + +#ifdef N64 +#define MAX_BUFFERS 12 ///< Maximum number of pending RDP buffers +#define MAX_HOOKS 4 ///< Maximum number of custom hooks +static rdp_buffer_t buffers[MAX_BUFFERS]; ///< Pending RDP buffers (ring buffer) +static volatile int buf_ridx, buf_widx; ///< Read/write index into the ring buffer of RDP buffers +static bool buf_changed; ///< True if the RDP has just switched buffer +static rdp_buffer_t last_buffer; ///< Last RDP buffer that was processed +static int show_log; ///< != 0 if logging is enabled +static void (*hooks[MAX_HOOKS])(void*, uint64_t*, int); ///< Custom hooks +static void* hooks_ctx[MAX_HOOKS]; ///< Context for the hooks + +// Documented in rdpq_debug_internal.h +void (*rdpq_trace)(void); +void (*rdpq_trace_fetch)(bool new_buffer); + +/** @brief Run the actual trace flushing the cached buffers */ +void __rdpq_trace_flush(void); + +/** @brief Implementation of #rdpq_trace_fetch */ +void __rdpq_trace_fetch(bool new_buffer) +{ + disable_interrupts(); + + // Extract current start/end pointers from RDP registers (in the uncached segment) + // Avoid race conditions versus RSP by reading the status register twice and retrying + // if it changed in between. + uint64_t *start, *end, status, status_prev; + do { + status_prev = *DP_STATUS; + start = (void*)(*DP_START | 0xA0000000); + end = (void*)(*DP_END | 0xA0000000); + status = *DP_STATUS; + } while (status != status_prev); + + // If the registers contain a new start pointer without its associated end pointer, + // it means that we can't use this data: we don't know the full new buffer yet. + // In this case, we just return and wait for the next call. + if ((status & DP_STATUS_START_VALID) && !(status & DP_STATUS_END_VALID)) + { + enable_interrupts(); + return; + } + +#if RDPQ_DEBUG_DEBUG + intdebugf("__rdpq_trace_fetch: %p-%p\n", start, end); + extern void *rspq_rdp_dynamic_buffers[2]; + for (int i=0;i<2;i++) + if ((void*)start >= rspq_rdp_dynamic_buffers[i] && (void*)end <= rspq_rdp_dynamic_buffers[i]+RDPQ_DYNAMIC_BUFFER_SIZE) + intdebugf(" -> dynamic buffer %d\n", i); +#endif + + assertf(start <= end, "rdpq_debug: invalid RDP buffer: %p-%p\n", start, end); + + // Coalesce with last written buffer if possible. Notice that rdpq_trace put the start + // pointer to NULL to avoid coalescing when it begins dumping it, so this should avoid + // race conditions. + int prev = buf_widx ? buf_widx - 1 : MAX_BUFFERS-1; + if (!buf_changed && buffers[prev].start == start) { + if (buffers[prev].end == end) { + buf_changed = new_buffer; + enable_interrupts(); + intdebugf(" -> ignored because coalescing\n"); + return; + } + // If the previous buffer was bigger, it is a logic error, as RDP buffers should only grow + assertf(buffers[prev].end <= end, "rdpq_debug: RDP buffer shrinking (%p-%p => %p-%p)\n", + buffers[prev].start, buffers[prev].end, start, end); + buffers[prev].end = end; + + // If the previous buffer was already dumped, dump it again as we added more + // information to it. We do not modify the "traced" pointer so that previously + // dumped commands are not dumped again. + if (buf_ridx == buf_widx) { + intdebugf(" -> replaying from %p\n", buffers[prev].traced); + buf_ridx = prev; + } + + intdebugf(" -> coalesced\n"); + buf_changed = new_buffer; + __rdpq_trace_flush(); // FIXME: remove this (see __rdpq_trace) + enable_interrupts(); + return; + } + + // If the buffer is full, we could continue logging by skipping a buffer, but the validator + // is done with. So for now just abort. + assertf((buf_widx + 1) % MAX_BUFFERS != buf_ridx, "validator buffer full\n"); + + // Write the new buffer. It should be an empty slot + buffers[buf_widx] = (rdp_buffer_t){ .start = start, .end = end, .traced = start }; + intdebugf(" -> written to slot %d\n", buf_widx); + buf_widx = (buf_widx + 1) % MAX_BUFFERS; + + // If we know for sure that the RDP is about the change buffer, remember it so that + // next reads will surely be a new one. For instance, this allows to process twice + // a same buffer sent two times in a row. + buf_changed = new_buffer; + + __rdpq_trace_flush(); // FIXME: remove this (see __rdpq_trace) + enable_interrupts(); +} + +/** @brief Process a RDPQ_DEBUG command */ +void __rdpq_debug_cmd(uint64_t cmd) +{ + switch(BITS(cmd, 48, 55)) { + case 0x01: // Show log + show_log += BIT(cmd, 0) ? 1 : -1; + return; + case 0x02: // Message + // Nothing to do. Debugging messages are shown by the disassembler + return; + } +} + +/** @brief Implementation of #rdpq_trace */ +void __rdpq_trace(void) +{ + // Update buffers to current RDP status. This make sure the trace + // is up to date. + __rdpq_trace_fetch(false); + __rdpq_trace_flush(); +} + +void __rdpq_trace_flush(void) +{ + while (1) { + uint64_t *cur = 0, *end = 0; + + // Pop next RDP buffer from ring buffer. Do it atomically to avoid races + disable_interrupts(); + if (buf_ridx != buf_widx) { + cur = buffers[buf_ridx].traced; + end = buffers[buf_ridx].end; + buffers[buf_ridx].traced = end; + buf_ridx = (buf_ridx + 1) % MAX_BUFFERS; + } + enable_interrupts(); + + // If there are no more pending buffers, we are done + if (!cur) break; + + // Go through the RDP buffer. If log is active, disassemble. + // Run the validator on all the commands. + while (cur < end) { + uint8_t cmd = BITS(cur[0],56,61); + int sz = rdpq_debug_disasm_size(cur); + + // Disassemble the command + bool shown = false; + if (show_log > 0) + shown = rdpq_debug_disasm(cur, stderr); + + // Validate the command: if the command was already shown, we don't need + // to further echo it. + uint32_t val_flags = shown ? RDPQ_VALIDATE_FLAG_NOECHO : 0; + rdpq_validate(cur, val_flags, NULL, NULL); + + // Run trace hooks + for (int i=0;i, "); + else + fprintf(out, " blend=[%s*%s + %s*%s, ", + blend1_a[som.blender[0].p], blend1_b1[som.blender[0].a], blend1_a[som.blender[0].q], som.blender[0].b ? blend1_b2[som.blender[0].b] : blend1_b1inv[som.blender[0].a]); + fprintf(out, "%s*%s + %s*%s]", + blend2_a[som.blender[1].p], blend2_b1[som.blender[1].a], blend2_a[som.blender[1].q], som.blender[1].b ? blend2_b2[som.blender[1].b] : blend2_b1inv[som.blender[1].a]); + } + if(som.z.upd || som.z.cmp || som.z.prim) { + fprintf(out, " z=["); FLAG_RESET(); + FLAG(som.z.cmp, "cmp"); FLAG(som.z.upd, "upd"); FLAG(som.z.prim, "prim"); FLAG(true, zmode[som.z.mode]); + fprintf(out, "]"); + } + flag_prefix = " "; + FLAG(som.aa, "aa"); FLAG(som.read, "read"); FLAG(som.blend, "blend"); + FLAG(som.chromakey, "chroma_key"); FLAG(som.atomic, "atomic"); + + if(som.alphacmp.enable) fprintf(out, " alpha_compare%s", som.alphacmp.noise ? "[noise]" : ""); + if((som.cycle_type < 2) && (som.dither.rgb != 3 || som.dither.alpha != 3)) fprintf(out, " dither=[%s,%s]", rgbdither[som.dither.rgb], alphadither[som.dither.alpha]); + if(som.cvg.mode || som.cvg.color || som.cvg.sel_alpha || som.cvg.mul_alpha) { + fprintf(out, " cvg=["); FLAG_RESET(); + FLAG(som.cvg.mode, cvgmode[som.cvg.mode]); FLAG(som.cvg.color, "color_ovf"); + FLAG(som.cvg.mul_alpha, "mul_alpha"); FLAG(som.cvg.sel_alpha, "sel_alpha"); + fprintf(out, "]"); + } + if(som.rdpqx.bl2 || som.rdpqx.freeze || som.rdpqx.fog) { + fprintf(out, " rdpq=["); FLAG_RESET(); + FLAG(som.rdpqx.bl2, "bl2"); FLAG(som.rdpqx.freeze, "freeze"); + FLAG(som.rdpqx.fog, "fog"); + fprintf(out, "]"); + } + fprintf(out, "\n"); + }; return; + case 0x3C: { fprintf(out, "SET_COMBINE_MODE "); + static const char* rgb_suba[16] = {"comb", "tex0", "tex1", "prim", "shade", "env", "1", "noise", "0","0","0","0","0","0","0","0"}; + static const char* rgb_subb[16] = {"comb", "tex0", "tex1", "prim", "shade", "env", "keycenter", "k4", "0","0","0","0","0","0","0","0"}; + static const char* rgb_mul[32] = {"comb", "tex0", "tex1", "prim", "shade", "env", "keyscale", "comb.a", "tex0.a", "tex1.a", "prim.a", "shade.a", "env.a", "lod_frac", "prim_lod_frac", "k5", "0","0","0","0","0","0","0","0", "0","0","0","0","0","0","0","0"}; + static const char* rgb_add[8] = {"comb", "tex0", "tex1", "prim", "shade", "env", "1", "0"}; + static const char* alpha_addsub[8] = {"comb", "tex0", "tex1", "prim", "shade", "env", "1", "0"}; + static const char* alpha_mul[8] = {"lod_frac", "tex0", "tex1", "prim", "shade", "env", "prim_lod_frac", "0"}; + colorcombiner_t cc = decode_cc(buf[0]); + fprintf(out, "cyc0=[(%s-%s)*%s+%s, (%s-%s)*%s+%s], ", + rgb_suba[cc.cyc[0].rgb.suba], rgb_subb[cc.cyc[0].rgb.subb], rgb_mul[cc.cyc[0].rgb.mul], rgb_add[cc.cyc[0].rgb.add], + alpha_addsub[cc.cyc[0].alpha.suba], alpha_addsub[cc.cyc[0].alpha.subb], alpha_mul[cc.cyc[0].alpha.mul], alpha_addsub[cc.cyc[0].alpha.add]); + const struct cc_cycle_s passthrough = {0}; + if (!__builtin_memcmp(&cc.cyc[1], &passthrough, sizeof(struct cc_cycle_s))) fprintf(out, "cyc1=[]\n"); + else fprintf(out, "cyc1=[(%s-%s)*%s+%s, (%s-%s)*%s+%s]\n", + rgb_suba[cc.cyc[1].rgb.suba], rgb_subb[cc.cyc[1].rgb.subb], rgb_mul[cc.cyc[1].rgb.mul], rgb_add[cc.cyc[1].rgb.add], + alpha_addsub[cc.cyc[1].alpha.suba], alpha_addsub[cc.cyc[1].alpha.subb], alpha_mul[cc.cyc[1].alpha.mul], alpha_addsub[cc.cyc[1].alpha.add]); + } return; + case 0x35: { fprintf(out, "SET_TILE "); + uint8_t f = BITS(buf[0], 53, 55); + fprintf(out, "tile=%d %s%s tmem[0x%x,line=%d]", + BITS(buf[0], 24, 26), fmt[f], size[BITS(buf[0], 51, 52)], + BITS(buf[0], 32, 40)*8, BITS(buf[0], 41, 49)*8); + if (f==2) fprintf(out, " pal=%d", BITS(buf[0], 20, 23)); + fprintf(out, " mask=[%d, %d]", 1<\n"); + else fprintf(out, "dram=%08" PRIx32 "\n", addr); + } return; + case 0x3d: fprintf(out, "SET_TEX_IMAGE dram=%08x w=%d %s%s\n", + BITS(buf[0], 0, 25), BITS(buf[0], 32, 41)+1, fmt[BITS(buf[0], 53, 55)], size[BITS(buf[0], 51, 52)]); + return; + case 0x3f: { + fprintf(out, "SET_COLOR_IMAGE "); + uint32_t addr = BITS(buf[0], 0, 25); + if (addr == RDPQ_VALIDATE_DETACH_ADDR) fprintf(out, "\n"); + else { + fprintf(out, "dram=%08" PRIx32 " w=%d ", addr, BITS(buf[0], 32, 41)+1); + int height = BITS(buf[0], 42, 50) | (BIT(buf[0], 31) << 9); + if (height) fprintf(out, "h=%d ", height+1); // libdragon extension + fprintf(out, "%s%s\n", fmt[BITS(buf[0], 53, 55)], size[BITS(buf[0], 51, 52)]); + } + } return; + case 0x31: switch(BITS(buf[0], 48, 55)) { + case 0x01: fprintf(out, "RDPQ_SHOWLOG show=%d\n", BIT(buf[0], 0)); return; + #ifdef N64 + case 0x02: fprintf(out, "RDPQ_MESSAGE %s\n", (char*)CachedAddr(0x80000000|BITS(buf[0], 0, 24))); return; + #endif + default: fprintf(out, "RDPQ_DEBUG \n"); return; + } + } +} + +static bool log_coalesce_tris(uint8_t cmd, uint8_t *last_tri_cmd, int *num_tris) { + if (!CMD_IS_TRI(cmd)) { + if (*last_tri_cmd) { + debugf("[..........] ................ %-16s num_cmds=%d\n", tri_name[*last_tri_cmd - 0x08], *num_tris); + *last_tri_cmd = 0; + *num_tris = 0; + } + return true; + } else { + if (*last_tri_cmd && *last_tri_cmd != cmd) { + debugf("[..........] ................ %-16s num_cmds=%d\n", tri_name[*last_tri_cmd - 0x08], *num_tris); + *last_tri_cmd = 0; + *num_tris = 0; + } + *last_tri_cmd = cmd; + *num_tris = *num_tris+1; + return false; + } +} + + +bool rdpq_debug_disasm(uint64_t *buf, FILE *out) { + static uint8_t last_tri_cmd = 0; static int num_tris = 0; + + if (buf) { + uint8_t cmd = BITS(buf[0],56,61); + if ((__rdpq_debug_log_flags & RDPQ_LOG_FLAG_SHOWTRIS) || log_coalesce_tris(cmd, &last_tri_cmd, &num_tris)) { + __rdpq_debug_disasm(buf, buf, out); + return true; + } + } else { + log_coalesce_tris(0, &last_tri_cmd, &num_tris); + } + return false; +} + +#define EMIT_TYPE 0x3 ///< Type of message (mask) +#define EMIT_CRASH 0x0 ///< Message is a RDP crash +#define EMIT_ERROR 0x1 ///< Message is an error +#define EMIT_WARN 0x2 ///< Message is a warning + +#define EMIT_CTX_SOM 0x4 ///< Message context must show last SOM +#define EMIT_CTX_CC 0x8 ///< Message context must show last CC +#define EMIT_CTX_TEX 0x10 ///< Message context must show last SET_TEX_IMAGE +#define EMIT_CTX_TILES (0xFF << 5) ///< Message context must show SET_TILE (mask) +#define EMIT_CTX_TILE(n) (0x20 << (n)) ///< Message context must show tile n +#define EMIT_CTX_TILESIZE 0x2000 ///< Message context must show LOAD_TILE/SET_TILE_SIZE instead of SET_TILE + +__attribute__((format(printf, 2, 3))) +static void validate_emit_error(int flags, const char *msg, ...) +{ + va_list args; + + if (!(vctx.flags & RDPQ_VALIDATE_FLAG_NOECHO)) { + if (flags & EMIT_CTX_SOM) __rdpq_debug_disasm(rdp.last_som, &rdp.last_som_data, stderr); + if (flags & EMIT_CTX_CC) __rdpq_debug_disasm(rdp.last_cc, &rdp.last_cc_data, stderr); + if (flags & EMIT_CTX_TEX) __rdpq_debug_disasm(rdp.last_tex, &rdp.last_tex_data, stderr); + if (flags & EMIT_CTX_TILES) { + for (int i = 0; i < 8; i++) { + if (flags & EMIT_CTX_TILE(i)) { + __rdpq_debug_disasm(rdp.tile[i].last_settile, &rdp.tile[i].last_settile_data, stderr); + if (rdp.tile[i].has_extents) + __rdpq_debug_disasm(rdp.tile[i].last_setsize, &rdp.tile[i].last_setsize_data, stderr); + break; + } + } + } + rdpq_debug_disasm(vctx.buf, stderr); + } + + switch (flags & EMIT_TYPE) { + case EMIT_CRASH: + fprintf(stderr, "[RDPQ_VALIDATION] CRASH: "); + vctx.crashed = true; + vctx.errs += 1; + break; + case EMIT_ERROR: + fprintf(stderr, "[RDPQ_VALIDATION] ERROR: "); + vctx.errs += 1; + break; + case EMIT_WARN: + fprintf(stderr, "[RDPQ_VALIDATION] WARN: "); + vctx.warns += 1; + break; + } + + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + + if ((flags & EMIT_TYPE) == EMIT_CRASH) + fprintf(stderr, "[RDPQ_VALIDATION] This is a fatal error: a real RDP chip would stop working until reboot\n"); + + if (flags & EMIT_CTX_SOM) fprintf(stderr, "[RDPQ_VALIDATION] SET_OTHER_MODES last sent at %p\n", rdp.last_som); + if (flags & EMIT_CTX_CC) fprintf(stderr, "[RDPQ_VALIDATION] SET_COMBINE_MODE last sent at %p\n", rdp.last_cc); + if (flags & EMIT_CTX_TEX) fprintf(stderr, "[RDPQ_VALIDATION] SET_TEX_IMAGE last sent at %p\n", rdp.last_tex); + if (flags & EMIT_CTX_TILES) { + for (int i = 0; i < 8; i++) { + if (flags & EMIT_CTX_TILE(i)) { + if (flags & EMIT_CTX_TILESIZE) + fprintf(stderr, "[RDPQ_VALIDATION] %s last sent at %p\n", + CMD(rdp.tile[i].last_setsize_data) == 0x32 ? "SET_TILE_SIZE" : "LOAD_TILE", + rdp.tile[i].last_setsize); + else + fprintf(stderr, "[RDPQ_VALIDATION] SET_TILE last sent at %p\n", rdp.tile[i].last_settile); + break; + } + } + } + + #ifdef N64 + // On a real N64, let's assert on RDP crashes. This makes them very visible to everybody, + // including people that don't have the debugging log on. + // We just dump the message here, more information are in the log. + if ((flags & EMIT_TYPE) == EMIT_CRASH) { + char buf[1024]; + va_start(args, msg); + vsprintf(buf, msg, args); + va_end(args); + assertf(0, "RDP CRASHED: the code triggered a RDP hardware bug.\n%s", buf); + } + #endif +} + +/** @brief Internal validation macros (for both errors and warnings) */ +#define __VALIDATE(flags, cond, msg, ...) ({ \ + if (!(cond)) validate_emit_error(flags, msg "\n", ##__VA_ARGS__); \ +}) + +/** + * @brief Check and trigger a RDP crash. + * + * This is the most fatal error condition, in which the RDP chip freezes and stop processing + * commands until reboot. + */ +#define VALIDATE_CRASH(cond, msg, ...) __VALIDATE(0, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a crash, with SOM context */ +#define VALIDATE_CRASH_SOM(cond, msg, ...) __VALIDATE(4, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a crash, with CC context */ +#define VALIDATE_CRASH_CC(cond, msg, ...) __VALIDATE(8, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a crash, with SET_TEX_IMAGE context */ +#define VALIDATE_CRASH_TEX(cond, msg, ...) __VALIDATE(16, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a crash, with tile context */ +#define VALIDATE_CRASH_TILE(cond, tidx, msg, ...) __VALIDATE(EMIT_CRASH | EMIT_CTX_TILE(tidx), cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a crash, with tile extents context */ +#define VALIDATE_CRASH_TILESIZE(cond, tidx, msg, ...) __VALIDATE(EMIT_CRASH | EMIT_CTX_TILE(tidx) | EMIT_CTX_TILESIZE, cond, msg, ##__VA_ARGS__) + +/** + * @brief Check and trigger a RDP validation error. + * + * This should be triggered only whenever the commands rely on an undefined hardware + * behaviour or in general strongly misbehave with respect to the reasonable + * expectation of the programmer. Typical expected outcome on real hardware should be + * garbled graphcis. */ +#define VALIDATE_ERR(cond, msg, ...) __VALIDATE(1, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with SOM context */ +#define VALIDATE_ERR_SOM(cond, msg, ...) __VALIDATE(5, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with CC context */ +#define VALIDATE_ERR_CC(cond, msg, ...) __VALIDATE(9, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with SET_TEX_IMAGE context */ +#define VALIDATE_ERR_TEX(cond, msg, ...) __VALIDATE(17, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with tile context */ +#define VALIDATE_ERR_TILE(cond, tidx, msg, ...) __VALIDATE(EMIT_ERROR | EMIT_CTX_TILE(tidx), cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with tile extents context */ +#define VALIDATE_ERR_TILESIZE(cond, tidx, msg, ...) __VALIDATE(EMIT_ERROR | EMIT_CTX_TILE(tidx) | EMIT_CTX_TILESIZE, cond, msg, ##__VA_ARGS__) + +/** + * @brief Check and trigger a RDP validation warning. + * + * This should be triggered whenever the commands deviate from standard practice or + * in general are dubious in their use. It does not necessarily mean that the RDP + * is going to misbehave but it is likely that the programmer did not fully understand + * what the RDP is going to do. It is OK to have false positives here -- if the situation + * becomes too unwiedly, we can later add a way to disable classes of warning in specific + * programs. + */ +#define VALIDATE_WARN(cond, msg, ...) __VALIDATE(2, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a warning, with SOM context */ +#define VALIDATE_WARN_SOM(cond, msg, ...) __VALIDATE(6, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a warning, with CC context */ +#define VALIDATE_WARN_CC(cond, msg, ...) __VALIDATE(10, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a warning, with SET_TEX_IMAGE context */ +#define VALIDATE_WARN_TEX(cond, msg, ...) __VALIDATE(18, cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger an error, with tile context */ +#define VALIDATE_WARN_TILE(cond, tidx, msg, ...) __VALIDATE(EMIT_WARN | EMIT_CTX_TILE(tidx), cond, msg, ##__VA_ARGS__) +/** @brief Validate and trigger a warning, with tile extents context */ +#define VALIDATE_WARN_TILESIZE(cond, tidx, msg, ...) __VALIDATE(EMIT_WARN | EMIT_CTX_TILE(tidx) | EMIT_CTX_TILESIZE, cond, msg, ##__VA_ARGS__) + +/** + * @brief Perform lazy evaluation of render target changes (color buffer and scissoring). + */ +static void lazy_validate_rendertarget(void) { + if (!rdp.rendertarget_changed) return; + rdp.rendertarget_changed = false; + + VALIDATE_ERR(rdp.last_col, + "undefined behavior: drawing command before a SET_COLOR_IMAGE was sent"); + VALIDATE_ERR(rdp.sent_scissor, + "undefined behavior: drawing command before a SET_SCISSOR was sent"); + if (!rdp.last_col || !rdp.sent_scissor) return; + + // copy/fill mode use inclusive X coordinates for most things, including scissor + int x1 = rdp.clip.x1; + if (rdp.som.cycle_type >= 2) x1++; + + VALIDATE_WARN(rdp.clip.x0 < x1, + "drawing command with null scissor rectangle (X:%d-%d)", rdp.clip.x0, rdp.clip.x1); + VALIDATE_WARN(rdp.clip.y0 < rdp.clip.y1, + "drawing command with null scissor rectangle (Y:%d-%d)", rdp.clip.y0, rdp.clip.y1); + VALIDATE_WARN(rdp.clip.x1 <= rdp.col.width, + "drawing command with scissor rectangle (X1=%d) outside of color buffer (W=%d)", rdp.clip.x1, rdp.col.width); + if (rdp.col.height > 1) { // libdragon extension + VALIDATE_WARN(rdp.clip.y1 <= rdp.col.height, + "drawing command with scissor rectangle (Y1=%d) outside of color buffer (H=%d)", rdp.clip.y1, rdp.col.height); + } + if (rdp.som.cycle_type == 2) { + VALIDATE_CRASH(rdp.clip.x0 == 0, + "drawing command in COPY mode: scissor left bound (%d) must be zero", rdp.clip.x0); + } + if (rdp.som.cycle_type == 3) { + VALIDATE_ERR(rdp.clip.x0 % 4 == 0, + "drawing command in FILL mode: scissor rectangle x0 (%d) must be a multiple of 4", rdp.clip.x0); + } +} + +/** @brief True if the current CC uses the TEX1 slot aka the second texture */ +static bool cc_use_tex1(void) { + struct cc_cycle_s *cc = rdp.cc.cyc; + if (rdp.som.cycle_type != 1) // TEX1 is used only in 2-cycle mode + return false; + if ((rdp.som.tf_mode & 3) == 1) // TEX1 is the color-conversion of TEX0, so TEX1 is not used + return false; + return + // Cycle0: reference to TEX1/TEX1_ALPHA slot + (cc[0].rgb.suba == 2 || cc[0].rgb.subb == 2 || cc[0].rgb.mul == 2 || cc[0].rgb.mul == 9 || cc[0].rgb.add == 2) || + // Cycle1: reference to TEX0/TEX0_ALPHA slot (which actually points to TEX1) + (cc[1].rgb.suba == 1 || cc[1].rgb.subb == 1 || cc[1].rgb.mul == 1 || cc[0].rgb.mul == 8 || cc[1].rgb.add == 1); +} + +/** + * @brief Perform lazy evaluation of SOM and CC changes (on draw command). + * + * Validation of color combiner requires to know the current cycle type (which is part of SOM). + * Since it's possible to send SOM / CC in any order, what matters is if, at the point of a + * drawing command, the configuration is correct. + * + * Validation of CC is thus run lazily whenever a draw command is issued. + * + * @note Do not perform validation of texture-related settings here. Use validate_use_tile instead, + * as that is the only place where we know exactly which tile is being used for drawing. + */ +static void lazy_validate_rendermode(void) { + if (!rdp.mode_changed) return; + rdp.mode_changed = false; + + // Fill mode validation + if (rdp.som.cycle_type == 3) { + if (rdp.last_col) { + VALIDATE_CRASH_SOM(rdp.col.size != 0, "FILL mode not supported on 4-bit framebuffers"); + } + // These are a bunch of SOM settings that, in addition of being useless in FILL mode, they cause + // a RDP crash. + VALIDATE_CRASH_SOM(!rdp.som.read, "image read is enabled but is not supported in FILL mode"); + VALIDATE_CRASH_SOM(!rdp.som.z.cmp, "Z buffer compare is enabled but is not supported in FILL mode"); + VALIDATE_CRASH_SOM(!rdp.som.z.upd || rdp.som.z.prim, "Z buffer write is enabled but is not supported in FILL mode"); + return; + } + + // Copy mode validation + if (rdp.som.cycle_type == 2) { + if (rdp.last_col) { + int size = BITS(rdp.last_col_data, 51, 52); + VALIDATE_CRASH_SOM(size != 3, "COPY mode not supported on 32-bit framebuffers"); + } + VALIDATE_ERR_SOM(!rdp.som.z.cmp, "Z buffer compare is enabled but is not supported in COPY mode"); + VALIDATE_ERR_SOM(!rdp.som.z.upd || rdp.som.z.prim, "Z buffer write is enabled but is not supported in COPY mode"); + VALIDATE_ERR_SOM(!rdp.som.tex.persp, "perspective correction is not supported in COPY mode"); + return; + } + + // We are in 1-cycle/2-cycle mode. Proceed to validate blender and color combiner. + + // Validate blender setting. If there is any blender fomula configured, we should + // expect one between SOM_BLENDING or SOM_ANTIALIAS, otherwise the formula will be ignored. + struct blender_s *b0 = &rdp.som.blender[0]; + struct blender_s *b1 = &rdp.som.blender[1]; + bool has_bl0 = b0->p || b0->a || b0->q || b0->b; + bool has_bl1 = b1->p || b1->a || b1->q || b1->b; + VALIDATE_WARN_SOM(rdp.som.blend || rdp.som.aa || !(has_bl0 || has_bl1), + "blender function will be ignored because SOM_BLENDING and SOM_ANTIALIAS are both disabled"); + if (rdp.som.cycle_type == 1) { // 2cyc + VALIDATE_ERR_SOM((b0->b == 0) || (b0->b == 2 && b0->a == 3), // INV_MUX_ALPHA, or ONE/ZERO (which still works) + "in 2cycle mode, the first pass of the blender must use INV_MUX_ALPHA or equivalent"); + VALIDATE_ERR_SOM(b0->a != 1, + "in 2cycle mode, the first pass of the blender cannot access MEMORY_RGB"); + VALIDATE_WARN_SOM(b1->a != 2, + "in 2cycle mode, the second pass of the blender will use a SHADE_ALPHA value shifted by one pixel because of a hardware bug"); + } + + // Validate other SOM states + if (!rdp.som.tex.lod) { + VALIDATE_ERR_SOM(!rdp.som.tex.sharpen && !rdp.som.tex.detail, + "sharpen/detail texture require texture LOD to be active"); + } + if (rdp.som.z.cmp || rdp.som.z.upd) { + VALIDATE_ERR_SOM(rdp.last_z, + "Z buffer image not configured but Z buffer mode was requested in SOM"); + } + + if (!rdp.last_cc) { + VALIDATE_ERR(rdp.last_cc, "SET_COMBINE not called before drawing primitive"); + return; + } + + // Sanitize color combiner. We are going to check a few slots for specific values + // but we want to avoid emitting errors for combinations that are "benign". For example, + // COMBINED in 1cycle mode is an error, but if you do (COMB-COMB), then it doesn't + // really matter. + struct cc_cycle_s ccs[2] = { rdp.cc.cyc[0], rdp.cc.cyc[1] }; + for (int i=0; i<2; i++) { + if (ccs[i].rgb.suba == ccs[i].rgb.subb || ccs[i].rgb.mul == 16) + ccs[i].rgb.suba = ccs[i].rgb.subb = 8; // change with 0, so that it doesn't matter + if (ccs[i].alpha.suba == ccs[i].alpha.subb || ccs[i].alpha.mul == 7) + ccs[i].alpha.suba = ccs[i].alpha.subb = 7; // change with 0, so that it doesn't matter + } + + if (rdp.som.cycle_type == 0) { // 1cyc + VALIDATE_ERR_CC(ccs[1].rgb.suba != 0 && ccs[1].rgb.subb != 0 && ccs[1].rgb.mul != 0 && ccs[1].rgb.add != 0 && + ccs[1].alpha.suba != 0 && ccs[1].alpha.subb != 0 && ccs[1].alpha.add != 0, + "in 1cycle mode, the color combiner cannot access the COMBINED slot"); + VALIDATE_ERR_CC(ccs[1].rgb.suba != 2 && ccs[1].rgb.subb != 2 && ccs[1].rgb.mul != 2 && ccs[1].rgb.add != 2 && + ccs[1].alpha.suba != 2 && ccs[1].alpha.subb != 2 && ccs[1].alpha.mul != 2 && ccs[1].alpha.add != 2, + "in 1cycle mode, the color combiner cannot access the TEX1 slot"); + VALIDATE_ERR_CC(ccs[1].rgb.mul != 7, + "in 1cycle mode, the color combiner cannot access the COMBINED_ALPHA slot"); + VALIDATE_ERR_CC(ccs[1].rgb.mul != 9, + "in 1cycle mode, the color combiner cannot access the TEX1_ALPHA slot"); + } else { // 2 cyc + VALIDATE_ERR_CC(ccs[0].rgb.suba != 0 && ccs[0].rgb.subb != 0 && ccs[0].rgb.mul != 0 && ccs[0].rgb.add != 0 && + ccs[0].alpha.suba != 0 && ccs[0].alpha.subb != 0 && ccs[0].alpha.add != 0, + "in 2cycle mode, the color combiner cannot access the COMBINED slot in the first cycle"); + VALIDATE_ERR_CC(ccs[1].rgb.suba != 2 && ccs[1].rgb.subb != 2 && ccs[1].rgb.mul != 2 && ccs[1].rgb.add != 2 && + ccs[1].alpha.suba != 2 && ccs[1].alpha.subb != 2 && ccs[1].alpha.mul != 2 && ccs[1].alpha.add != 2, + "in 2cycle mode, the color combiner cannot access the TEX1 slot in the second cycle (but TEX0 contains the second texture)"); + VALIDATE_ERR_CC(ccs[0].rgb.mul != 7, + "in 2cycle mode, the color combiner cannot access the COMBINED_ALPHA slot in the first cycle"); + VALIDATE_ERR_CC(ccs[1].rgb.mul != 9, + "in 2cycle mode, the color combiner cannot access the TEX1_ALPHA slot in the second cycle (but TEX0_ALPHA contains the second texture)"); + if (rdp.som.alphacmp.enable && !rdp.som.alphacmp.noise) { + bool cc1_passthrough = (ccs[1].alpha.mul == 7 && ccs[1].alpha.add == 0); // (any-any)*0+combined + VALIDATE_ERR_CC(cc1_passthrough, + "in 2cycle mode, alpha compare is broken if the second alpha combiner cycle is not a passthrough because of a hardware bug"); + VALIDATE_WARN_CC(!cc1_passthrough, + "in 2cycle mode, alpha compare is often shifted by one pixel because of a hardware bug"); + } + } +} + +/** + * @brief Perform validaation of a draw command (rectangle or triangle) + * + * @param use_colors True if the draw command has the shade component + * @param use_tex True if the draw command has the texture component + * @param use_z True if the draw command has the Z component + * @param use_w True if the draw command has the W component + */ +static void validate_draw_cmd(bool use_colors, bool use_tex, bool use_z, bool use_w) +{ + if (rdp.som.z.prim && (rdp.som.z.cmp || rdp.som.z.upd)) { + VALIDATE_WARN_SOM(!use_z, "per-vertex Z value will be ignored because Z-source is set to primitive"); + VALIDATE_ERR_SOM(rdp.sent_zprim, "Z-source is set to primitive but SET_PRIM_DEPTH was never sent"); + use_z = true; + } + + switch (rdp.som.cycle_type) { + case 0 ... 1: { // 1cyc, 2cyc + bool cc_use_tex0=false, cc_use_tex1=false, cc_use_tex0alpha=false, cc_use_tex1alpha=false; + bool cc_use_shade=false, cc_use_shadealpha=false, bl_use_shadealpha=false; + + for (int i=0; i<=rdp.som.cycle_type; i++) { + struct blender_s *bls = &rdp.som.blender[i]; + struct cc_cycle_s *ccs = &rdp.cc.cyc[i^1]; + uint8_t slots[8] = { + ccs->rgb.suba, ccs->rgb.subb, ccs->rgb.mul, ccs->rgb.add, + ccs->alpha.suba, ccs->alpha.subb, ccs->alpha.mul, ccs->alpha.add, + }; + + cc_use_tex0 |= (bool)memchr(slots, 1, sizeof(slots)); + cc_use_tex1 |= (bool)memchr(slots, 2, sizeof(slots)); + cc_use_tex0alpha |= (ccs->rgb.mul == 8); + cc_use_tex1alpha |= (ccs->rgb.mul == 9); + + cc_use_shade |= (bool)memchr(slots, 4, sizeof(slots)); + cc_use_shadealpha |= (ccs->rgb.mul == 11); + bl_use_shadealpha |= (bls->a == 2); + } + + if (use_tex) { + VALIDATE_WARN_CC(cc_use_tex0 || cc_use_tex1 || cc_use_tex0alpha || cc_use_tex1alpha, + "textured primitive drawn but the color combiner that does not use the TEX0/TEX1/TEX0_ALPHA/TEX1_ALPHA slots"); + } else { + VALIDATE_ERR_CC(!cc_use_tex0, + "cannot draw a non-textured primitive with a color combiner using the TEX0 slot"); + VALIDATE_ERR_CC(!cc_use_tex1, + "cannot draw a non-textured primitive with a color combiner using the TEX1 slot"); + VALIDATE_ERR_CC(!cc_use_tex0alpha && !cc_use_tex1alpha, + "cannot draw a non-shaded primitive with a color combiner using the TEX%d_ALPHA slot", cc_use_tex0alpha ? 0 : 1); + } + + if (use_colors) { + VALIDATE_WARN_CC(cc_use_shade || cc_use_shadealpha || bl_use_shadealpha, + "shaded primitive drawn but neither the color combiner nor the blender use the SHADE/SHADE_ALPHA slots"); + } else { + VALIDATE_ERR_CC(!cc_use_shade, + "cannot draw a non-shaded primitive with a color combiner using the SHADE slot"); + VALIDATE_ERR_CC(!cc_use_shadealpha, + "cannot draw a non-shaded primitive with a color combiner using the SHADE_ALPHA slot"); + VALIDATE_ERR_SOM(!bl_use_shadealpha, + "cannot draw a non-shaded primitive with a blender using the SHADE_ALPHA slot"); + } + + if (use_tex && !use_w) + VALIDATE_ERR_SOM(!rdp.som.tex.persp, + "cannot draw a textured primitive with perspective correction but without per-vertex W coordinate"); + + if (!use_z) { + VALIDATE_ERR_SOM(!rdp.som.z.cmp && !rdp.som.z.upd, + "cannot draw a primitive without Z coordinate if Z buffer access is activated"); + } + + } break; + } +} + +static void validate_busy_pipe(void) { + VALIDATE_WARN(!rdp.busy.pipe, "pipe might be busy, SYNC_PIPE is missing"); + rdp.busy.pipe = false; +} + +static void validate_busy_tile(int tidx) { + VALIDATE_WARN(!rdp.busy.tile[tidx], + "tile %d might be busy, SYNC_TILE is missing", tidx); + rdp.busy.tile[tidx] = false; +} + +/** @brief Mark TMEM as busy in range [addr..addr+size] */ +static void mark_busy_tmem(int addr, int size) { + int x0 = MIN(addr, 0x1000)/8, x1 = MIN(addr+size, 0x1000)/8, x = x0; + while ((x&7) && x < x1) { rdp.busy.tmem[x/8] |= 1 << (x&7); x++; } + while (x+8 < x1) { rdp.busy.tmem[x/8] = 0xFF; x+=8; } + while (x < x1) { rdp.busy.tmem[x/8] |= 1 << (x&7); x++; } +} + +/** @brief Check if TMEM is busy in range [addr..addr+size] */ +static bool is_busy_tmem(int addr, int size) { + int x0 = MIN(addr, 0x1000)/8, x1 = MIN(addr+size, 0x1000)/8, x = x0; + while ((x&7) && x < x1) { if (rdp.busy.tmem[x/8] & 1 << (x&7)) return true; x++; } + while (x+8 < x1) { if (rdp.busy.tmem[x/8] != 0) return true; x+=8; } + while (x < x1) { if (rdp.busy.tmem[x/8] & 1 << (x&7)) return true; x++; } + return false; +} + +static void validate_busy_tmem(int addr, int size) { + VALIDATE_WARN(!is_busy_tmem(addr, size), "writing to TMEM[0x%x:0x%x] while busy, SYNC_LOAD missing", addr, addr+size); +} + +static bool check_loading_crash(int hpixels) { + // Check for a very rare crash while loading from a misaligned address. + // The address must have a special type of misalignment within the lower half of each 16-byte line. + if ((rdp.tex.physaddr & 0xF) == 0) return false; + if ((rdp.tex.physaddr & 0xF) >= 8) return false; + // This crash doesn't apply to 4bpp textures. Notice that 4bpp always crash with LOAD_TILE (even aligned + // addresses) but that's handled elsewhere. So this check applies to LOAD_BLOCK. + if (rdp.tex.size == 0) return false; + // At least ~58 bytes must be loaded in each horizontal line. This can vary a little bit depending + // on bitdepth but the number is almost right. + if (hpixels * (4 << rdp.tex.size) / 8 < 58) return false; + // Crash triggered + return true; +} + +/** + * @brief Perform validation of a tile descriptor being used as part of a drawing command. + * + * @param tidx tile ID + * @param cycle Number of the cycle in which the the tile is being used (0 or 1) + * @param texcoords Array of texture coordinates (S,T) used by the drawing command. + * @param ncoords Number of vertices in the array (the actual array element count will be double this number) + */ +static void validate_use_tile(int tidx, int cycle, float *texcoords, int ncoords) { + struct tile_s *tile = &rdp.tile[tidx]; + rdp.busy.tile[tidx] = true; + bool use_outside = false; + float out_s, out_t; + + if (!tile->last_settile) + VALIDATE_ERR(tile->last_settile, "tile %d was not configured", tidx); + else if (!tile->has_extents) + VALIDATE_ERR_TILE(tile->has_extents, tidx, "tile %d has no extents set, missing LOAD_TILE or SET_TILE_SIZE", tidx); + else { + // Check whether there are texels outside the tile extents + for (int i=0; is0 || out_s > tile->s1 || out_t < tile->t0 || out_t > tile->t1); + } + } + + switch (rdp.som.cycle_type) { + case 0: case 1: // 1-cycle / 2-cycle modes + // YUV render mode mistakes in 1-cyc/2-cyc, that is when YUV conversion can be done. + // In copy mode, YUV textures are copied as-is + if (tile->fmt == 1) { + VALIDATE_ERR_SOM(!(rdp.som.tf_mode & (4>>cycle)), + "tile %d is YUV but texture filter in cycle %d does not activate YUV color conversion", tidx, cycle); + if (rdp.som.sample_type > 1) { + static const char* texinterp[] = { "point", "point", "bilinear", "median" }; + VALIDATE_ERR_SOM(rdp.som.tf_mode == 6 && rdp.som.cycle_type == 1, + "tile %d is YUV and %s filtering is active: TF1_YUVTEX0 mode must be configured in SOM", tidx, texinterp[rdp.som.sample_type]); + VALIDATE_ERR_SOM(rdp.som.cycle_type == 1, + "tile %d is YUV and %s filtering is active: 2-cycle mode must be configured", tidx, texinterp[rdp.som.sample_type]); + } + } else { + VALIDATE_ERR_SOM((rdp.som.tf_mode & (4>>cycle)), + "tile %d is RGB-based, but cycle %d is configured for YUV color conversion; try setting SOM_TF%d_RGB", tidx, cycle, cycle); + } + // Validate clamp/mirror/wrap modes + if (use_outside) { + VALIDATE_WARN_TILE(tile->s.clamp || tile->s.mask, tidx, + "tile %d will clamp horizontally because mask is 0, but clamp for S is not set", tidx); + VALIDATE_WARN_TILE(tile->t.clamp || tile->t.mask, tidx, + "tile %d will clamp vertically because mask is 0, but clamp for T is not set", tidx); + } + break; + case 2: // copy mode + VALIDATE_ERR_SOM(tile->fmt != 3 && tile->fmt != 4 && (tile->fmt != 0 || tile->size != 3), + "tile %d is %s%d, but COPY mode does not support I4/I8/IA4/IA8/IA16/RGBA32", tidx, tex_fmt_name[tile->fmt], 4 << tile->size); + VALIDATE_ERR_TILESIZE(!use_outside, tidx, + "draw primitive accesses texel at (%.2f,%.2f) outside of the tile in COPY mode", out_s, out_t); + break; + } + + // Check that TLUT mode in SOM is active if the tile requires it (and vice-versa) + if (tile->fmt == 2) // Color index + VALIDATE_ERR_SOM(rdp.som.tlut.enable, "tile %d is CI (color index), but TLUT mode was not activated", tidx); + else + VALIDATE_ERR_SOM(!rdp.som.tlut.enable, "tile %d is not CI (color index), but TLUT mode is active", tidx); + + // Mark used areas of tmem + switch (tile->fmt) { + case 0: case 3: case 4: // RGBA, IA, I + if (tile->size == 3) { // 32-bit: split between lo and hi TMEM + mark_busy_tmem(tile->tmem_addr, (tile->t1-tile->t0+1)*tile->tmem_pitch / 2); + mark_busy_tmem(tile->tmem_addr + 0x800, (tile->t1-tile->t0+1)*tile->tmem_pitch / 2); + } else { + mark_busy_tmem(tile->tmem_addr, (tile->t1-tile->t0+1)*tile->tmem_pitch); + } + break; + case 1: // YUV: split between low and hi TMEM + mark_busy_tmem(tile->tmem_addr, (tile->t1-tile->t0+1)*tile->tmem_pitch / 2); + mark_busy_tmem(tile->tmem_addr+0x800, (tile->t1-tile->t0+1)*tile->tmem_pitch / 2); + break; + case 2: // color-index: mark also palette area of TMEM as used + mark_busy_tmem(tile->tmem_addr, (tile->t1-tile->t0+1)*tile->tmem_pitch); + if (tile->size == 0) mark_busy_tmem(0x800 + tile->pal*64, 64); // CI4 + if (tile->size == 1) mark_busy_tmem(0x800, 0x800); // CI8 + break; + } + + // If this is the tile for cycle0 and the combiner uses TEX1, + // then also tile+1 is used. Process that as well. + if (cycle == 0 && cc_use_tex1()) + validate_use_tile((tidx+1) & 7, 1, texcoords, ncoords); +} + +void rdpq_validate(uint64_t *buf, uint32_t flags, int *r_errs, int *r_warns) +{ + vctx.buf = buf; + vctx.flags = flags; + if (r_errs) *r_errs = vctx.errs; + if (r_warns) *r_warns = vctx.warns; + + uint8_t cmd = CMD(buf[0]); + switch (cmd) { + case 0x3F: { // SET_COLOR_IMAGE + validate_busy_pipe(); + rdp.col.fmt = BITS(buf[0], 53, 55); + rdp.col.size = BITS(buf[0], 51, 52); + rdp.col.width = BITS(buf[0], 32, 41)+1; + rdp.col.height = (BITS(buf[0], 42, 50) | (BIT(buf[0], 31) << 9))+1; // libdragon extension + int size = 4 << rdp.col.size; + VALIDATE_ERR(BITS(buf[0], 0, 5) == 0, "color image must be aligned to 64 bytes"); + switch (size) { + case 4: + VALIDATE_ERR(false, "cannot render to 4bpp surface of type %s%d", + tex_fmt_name[rdp.col.fmt], size); break; + case 8: + VALIDATE_WARN(rdp.col.fmt == 2 || rdp.col.fmt == 4, "color image is defined %s%d but it will render as I8", + tex_fmt_name[rdp.col.fmt], size); break; + case 16: case 32: + VALIDATE_WARN(rdp.col.fmt == 0, "color image is defined %s%d but it will render as RGBA%d", + tex_fmt_name[rdp.col.fmt], size, size); break; + } + uint32_t addr = BITS(buf[0], 0, 24); + if (RDPQ_VALIDATE_DETACH_ADDR && addr == RDPQ_VALIDATE_DETACH_ADDR) { + // special case for libdragon: if the address is 0x800000, then it means + // that the developer requested to detach the framebuffer. Treat it as + // if SET_COLOR_IMAGE was never sent. + rdp.last_col = NULL; + rdp.last_col_data = 0; + } else { + VALIDATE_ERR(addr > 0x400, "color image address set to low RDRAM"); + VALIDATE_WARN(addr < 0x800000, "color image address is out of RDRAM"); + rdp.last_col = &buf[0]; + rdp.last_col_data = buf[0]; + } + rdp.mode_changed = true; // revalidate render mode on different framebuffer format + rdp.rendertarget_changed = true; // revalidate clipping extents on render target + } break; + case 0x3E: { // SET_Z_IMAGE + validate_busy_pipe(); + VALIDATE_ERR(BITS(buf[0], 0, 5) == 0, "Z image must be aligned to 64 bytes"); + uint32_t addr = BITS(buf[0], 0, 24); + if (RDPQ_VALIDATE_DETACH_ADDR && addr == RDPQ_VALIDATE_DETACH_ADDR) { + // special case for libdragon: if the address is 0x800000, then it means + // that the developer requested to detach the Z buffer. Treat it as + // if SET_Z_IMAGE was never sent. + rdp.last_z = NULL; + rdp.last_z_data = 0; + } else { + VALIDATE_ERR(addr > 0x400, "Z image address set to low RDRAM"); + VALIDATE_WARN(addr < 0x800000, "Z image address is out of RDRAM"); + rdp.last_z = &buf[0]; + rdp.last_z_data = buf[0]; + } + rdp.mode_changed = true; // revalidate render mode on different Z buffer + } break; + case 0x3D: // SET_TEX_IMAGE + validate_busy_pipe(); + VALIDATE_ERR(BITS(buf[0], 0, 2) == 0, "texture image must be aligned to 8 bytes"); + rdp.tex.physaddr = BITS(buf[0], 0, 24); + rdp.tex.fmt = BITS(buf[0], 53, 55); + rdp.tex.size = BITS(buf[0], 51, 52); + rdp.last_tex = &buf[0]; + rdp.last_tex_data = buf[0]; + break; + case 0x35: { // SET_TILE + int tidx = BITS(buf[0], 24, 26); + validate_busy_tile(tidx); + struct tile_s *t = &rdp.tile[tidx]; + *t = (struct tile_s){ + .last_settile = &buf[0], + .last_settile_data = buf[0], + .fmt = BITS(buf[0], 53, 55), .size = BITS(buf[0], 51, 52), + .pal = BITS(buf[0], 20, 23), + .has_extents = false, + .tmem_addr = BITS(buf[0], 32, 40)*8, + .tmem_pitch = BITS(buf[0], 41, 49)*8, + .s.clamp = BIT(buf[0], 9), .t.clamp = BIT(buf[0], 19), + .s.mirror = BIT(buf[0], 8), .t.mirror = BIT(buf[0], 18), + .s.mask = BITS(buf[0], 4, 7), .t.mask = BITS(buf[0], 14, 17), + }; + if (t->fmt == 2 && t->size == 1) + VALIDATE_WARN(t->pal == 0, "invalid non-zero palette for CI8 tile"); + if (t->fmt == 1 || (t->fmt == 0 && t->size == 3)) // YUV && RGBA32 + VALIDATE_ERR(t->tmem_addr < 0x800, "format %s requires address in low TMEM (< 0x800)", t->fmt==1 ? "YUV" : "RGBA32"); + } break; + case 0x32: case 0x34: { // SET_TILE_SIZE, LOAD_TILE + bool load = cmd == 0x34; + int tidx = BITS(buf[0], 24, 26); + struct tile_s *t = &rdp.tile[tidx]; + validate_busy_tile(tidx); + if (load) { + rdp.busy.tile[tidx] = true; // mask as in use + VALIDATE_CRASH_TEX(rdp.tex.size != 0, "LOAD_TILE does not support 4-bit textures"); + } + t->has_extents = true; + t->last_setsize = &buf[0]; + t->last_setsize_data = buf[0]; + t->s0 = BITS(buf[0], 44, 55)*FX(2); t->t0 = BITS(buf[0], 32, 43)*FX(2); + t->s1 = BITS(buf[0], 12, 23)*FX(2); t->t1 = BITS(buf[0], 0, 11)*FX(2); + if (load) { + int hpixels = (int)t->s1 - (int)t->s0 + 1; + VALIDATE_CRASH_TEX(!check_loading_crash(hpixels), "loading pixels from a misaligned texture image"); + validate_busy_tmem(t->tmem_addr, (t->t1-t->t0+1) * t->tmem_pitch); + } + } break; + case 0x33: { // LOAD_BLOCK + int tidx = BITS(buf[0], 24, 26); + int hpixels = BITS(buf[0], 12, 23)+1; + VALIDATE_ERR_TEX(hpixels <= 2048, "cannot load more than 2048 texels at once"); + VALIDATE_CRASH_TEX(!check_loading_crash(hpixels), "loading pixels from a misaligned texture image"); + rdp.busy.tile[tidx] = true; // mask as in use + } break; + case 0x30: { // LOAD_TLUT + int tidx = BITS(buf[0], 24, 26); + rdp.busy.tile[tidx] = true; // mask as in use + struct tile_s *t = &rdp.tile[tidx]; + int low = BITS(buf[0], 44, 55), high = BITS(buf[0], 12, 23); + if (rdp.tex.size == 0) + VALIDATE_CRASH_TEX(rdp.tex.size != 0, "LOAD_TLUT does not support 4-bit textures"); + else + VALIDATE_ERR_TEX(rdp.tex.fmt == 0 && rdp.tex.size == 2, "LOAD_TLUT requires texture in RGBA16 format"); + VALIDATE_ERR(t->tmem_addr >= 0x800, "palettes must be loaded in upper half of TMEM (address >= 0x800)"); + VALIDATE_WARN(!(low&3) && !(high&3), "lowest 2 bits of palette start/stop must be 0"); + VALIDATE_ERR(low>>2 < 256, "palette start index must be < 256"); + VALIDATE_ERR(high>>2 < 256, "palette stop index must be < 256"); + VALIDATE_CRASH(low>>2 <= high>>2, "palette stop index is lower than palette start index"); + } break; + case 0x2F: // SET_OTHER_MODES + validate_busy_pipe(); + rdp.som = decode_som(buf[0]); + rdp.last_som = &buf[0]; + rdp.last_som_data = buf[0]; + rdp.mode_changed = true; + rdp.rendertarget_changed = true; // revalidate clipping extents on render target (cycle mode mught be changed) + break; + case 0x3C: // SET_COMBINE + validate_busy_pipe(); + rdp.cc = decode_cc(buf[0]); + rdp.last_cc = &buf[0]; + rdp.last_cc_data = buf[0]; + rdp.mode_changed = true; + break; + case 0x2D: // SET_SCISSOR + rdp.clip.x0 = BITS(buf[0],44,55)*FX(2); rdp.clip.y0 = BITS(buf[0],32,43)*FX(2); + rdp.clip.x1 = BITS(buf[0],12,23)*FX(2); rdp.clip.y1 = BITS(buf[0], 0,11)*FX(2); + rdp.sent_scissor = true; + rdp.rendertarget_changed = true; + break; + case 0x25: // TEX_RECT_FLIP + VALIDATE_ERR(rdp.som.cycle_type < 2, "cannot draw texture rectangle flip in copy/fill mode"); + // passthrough + case 0x24: { // TEX_RECT + rdp.busy.pipe = true; + lazy_validate_rendertarget(); + lazy_validate_rendermode(); + validate_draw_cmd(false, true, false, false); + // Compute texture coordinates to validate tile usage + int w = (BITS(buf[0], 44, 55) - BITS(buf[0], 12, 23))*FX(2); + int h = (BITS(buf[0], 32, 43) - BITS(buf[0], 0, 11))*FX(2); + float s0 = BITS(buf[1], 48, 63)*FX(5), t0 = BITS(buf[1], 32, 47)*FX(5); + float sw = SBITS(buf[1], 16, 31)*FX(10), tw = SBITS(buf[1], 0, 15)*FX(10); + if (rdp.som.cycle_type == 2) w += 1; // copy mode has inclusive horizontal bounds + if (rdp.som.cycle_type == 2) sw /= 4; // copy mode has 4x horizontal scale + validate_use_tile(BITS(buf[0], 24, 26), 0, (float[]){s0, t0, s0+sw*(w-1), t0+tw*(h-1)}, 2); + if (rdp.som.cycle_type == 2) { + uint16_t dsdx = BITS(buf[1], 16, 31); + if (dsdx != 4<<10) { + if (dsdx > 4<<10 && dsdx <= 5<<10) + VALIDATE_WARN_SOM(0, "drawing texture rectangles in COPY mode with small horizontal reduction (< 20%%) will render without subpixel accuracy; consider using 1-cycle mode instead"); + else + VALIDATE_ERR_SOM(0, "horizontally-scaled texture rectangles in COPY mode will not correctly render"); + } + } + // Check mipmapping related quirks with rectangles + VALIDATE_WARN_SOM(!rdp.som.tex.lod, "mipmapping does not work with texture rectangles, it will be ignored"); + if (!rdp.som.tex.lod && rdp.som.cycle_type < 2) { + // avoid specific LOD_FRAC warnings if we already issued the previous one + for (int i=0; i<=rdp.som.cycle_type; i++) { + struct cc_cycle_s *ccs = &rdp.cc.cyc[i^1]; + bool lod_frac_rgb = ccs->rgb.mul == 13; + bool lod_frac_alpha = ccs->alpha.mul == 0; + if (lod_frac_alpha && ccs->alpha.suba == 0 && ccs->alpha.subb == 0) + lod_frac_alpha = false; // (0-0)*lod_frac is allowed without warnings (it's used as passthrough) + VALIDATE_WARN_CC(!lod_frac_rgb && !lod_frac_alpha, + "LOD_FRAC is not calculated correctly in rectangles (it's always 0x00 or 0xFF)"); + } + } + } break; + case 0x36: // FILL_RECTANGLE + rdp.busy.pipe = true; + lazy_validate_rendertarget(); + lazy_validate_rendermode(); + validate_draw_cmd(false, false, false, false); + break; + case 0x8 ... 0xF: // Triangles + rdp.busy.pipe = true; + VALIDATE_ERR_SOM(rdp.som.cycle_type < 2, "cannot draw triangles in copy/fill mode"); + lazy_validate_rendertarget(); + lazy_validate_rendermode(); + validate_draw_cmd(cmd & 4, cmd & 2, cmd & 1, cmd & 2); + if (cmd & 2) validate_use_tile(BITS(buf[0], 48, 50), 0, NULL, 0); // TODO: pass texture coordinates here + if (BITS(buf[0], 51, 53)) + VALIDATE_WARN_SOM(rdp.som.tex.lod, "triangle with %d mipmaps specified, but mipmapping is disabled", + BITS(buf[0], 51, 53)+1); + break; + case 0x27: // SYNC_PIPE + rdp.busy.pipe = false; + break; + case 0x29: // SYNC_FULL + memset(&rdp.busy, 0, sizeof(rdp.busy)); + break; + case 0x28: // SYNC_TILE + memset(&rdp.busy.tile, 0, sizeof(rdp.busy.tile)); + break; + case 0x26: // SYNC_LOAD + memset(&rdp.busy.tmem, 0, sizeof(rdp.busy.tmem)); + break; + case 0x2E: // SET_PRIM_DEPTH + rdp.sent_zprim = true; + break; + case 0x3A: // SET_PRIM_COLOR + break; + case 0x37: // SET_FILL_COLOR + case 0x38: // SET_FOG_COLOR + case 0x39: // SET_BLEND_COLOR + case 0x3B: // SET_ENV_COLOR + case 0x2C: // SET_CONVERT + validate_busy_pipe(); + break; + case 0x31: // RDPQ extensions + case 0x00: // NOP + break; + default: // Invalid command + VALIDATE_WARN(0, "invalid RDP command 0x%02X", cmd); + break; + } + + if (r_errs) *r_errs = vctx.errs - *r_errs; + if (r_warns) *r_warns = vctx.warns - *r_warns; + vctx.buf = NULL; +} + +#ifdef N64 +surface_t rdpq_debug_get_tmem(void) { + // Dump the TMEM as a 32x64 surface of 16bit pixels + surface_t surf = surface_alloc(FMT_RGBA16, 32, 64); + + rdpq_attach(&surf, NULL); + rdpq_mode_push(); + rdpq_set_mode_copy(false); + rdpq_set_tile(RDPQ_TILE_INTERNAL, FMT_RGBA16, 0, 32*2, 0); // pitch: 32 px * 16-bit + rdpq_set_tile_size(RDPQ_TILE_INTERNAL, 0, 0, 32, 64); + rdpq_texture_rectangle(RDPQ_TILE_INTERNAL, + 0, 0, 32, 64, // x0,y0, x1,y1 + 0, 0 // s, t + ); + rdpq_mode_pop(); + rdpq_detach_wait(); + + // We dumped TMEM contents using a rectangle. When RDP accesses TMEM + // for drawing, odd lines are dword-swapped. So we need to swap back + // the contents of our buffer to restore the original TMEM layout. + uint8_t *tmem = surf.buffer; + for (int y=0;y<4096;y+=64) { + if ((y/64)&1) { // odd line of 64x64 rectangle + uint32_t *s = (uint32_t*)&tmem[y]; + for (int i=0;i<16;i+=2) + SWAP(s[i], s[i+1]); + } + } + + return surf; +} +#endif \ No newline at end of file diff --git a/src/rdpq/rdpq_debug_internal.h b/src/rdpq/rdpq_debug_internal.h new file mode 100644 index 0000000000..6f18d03d8f --- /dev/null +++ b/src/rdpq/rdpq_debug_internal.h @@ -0,0 +1,71 @@ +#ifndef LIBDRAGON_RDPQ_DEBUG_INTERNAL_H +#define LIBDRAGON_RDPQ_DEBUG_INTERNAL_H + +#include +#include +#include + +/** + * @brief Log all the commands run by RDP until the time of this call. + * + * Given that RDP buffers get reused as circular buffers, it is important + * to call this function often enough. + */ +extern void (*rdpq_trace)(void); + +/** + * @brief Notify the trace engine that RDP is about to change buffer. + * + * Calling this function notifies the trace engine that the RDP buffer is possibly + * going to be switched soon, and the current pointers should be fetched and stored + * away for later dump. + * + * Notice that this function does not create a copy of the memory contents, but just + * saves the DP_START/DP_END pointers. It is up to the client to make sure to call + * rdpq_trace() at least once before the same buffer gets overwritten in the future. + * + * @param new_buffer If true, we know for sure that the RDP is about to switch buffer. + * If false, this is an optimistic reading (eg: done in idle time), + * so the contents might match previous readings. + */ +extern void (*rdpq_trace_fetch)(bool new_buffer); + +/** + * @brief Validate the next RDP command, given the RDP current state + * + * @param buf Pointer to the RDP command + * @param flags Flags that configure the validation + * @param[out] errs If provided, this variable will contain the number of + * validation errors that were found. + * @param[out] warns If provided, this variable will contain the number of + * validation warnings that were found. + */ +void rdpq_validate(uint64_t *buf, uint32_t flags, int *errs, int *warns); + +/** @brief Disable echo of commands triggering validation errors */ +#define RDPQ_VALIDATE_FLAG_NOECHO 0x00000001 + +/** @brief Show all triangles in logging (default: off) */ +#define RDPQ_LOG_FLAG_SHOWTRIS 0x00000001 + +/** @brief Flags that configure the logging */ +extern int __rdpq_debug_log_flags; + +/** + * @brief Special detach RDRAM address + * + * When this is set to a non-zero value, the validator will treat the address specified + * here as a special "detach" marker. When SET_COLOR_IMAGE or SET_Z_IMAGE are sent with + * this address, the validator will adjust its internal state as if the no SET_COLOR_IMAGE + * was ever sent, giving appropriate error messages if a drawing command is then issued. + * + * This allows libdragon to improve the user experience when the user forgets to configure + * the render target, explicitly telling that no render target is currently attached to RDP. + * + * On real hardware, when the RDP is configured to access an address in range 0x00800000 - 0x00FFFFFF, + * it will simply ignore all writes (and all reads return 0), so anything in that range is + * actually a safe value to "disable" a render target. + */ +#define RDPQ_VALIDATE_DETACH_ADDR 0x00800000 + +#endif /* LIBDRAGON_RDPQ_DEBUG_INTERNAL_H */ diff --git a/src/rdpq/rdpq_internal.h b/src/rdpq/rdpq_internal.h new file mode 100644 index 0000000000..fdcac6ca6d --- /dev/null +++ b/src/rdpq/rdpq_internal.h @@ -0,0 +1,176 @@ +/** + * @file rdpq_internal.h + * @brief RDP Command queue: internal functions + * @ingroup rdp + */ + +#ifndef __LIBDRAGON_RDPQ_INTERNAL_H +#define __LIBDRAGON_RDPQ_INTERNAL_H + +#include "pputils.h" +#include "rspq.h" +#include "../rspq/rspq_internal.h" + +/** @brief True if the rdpq module was inited */ +extern bool __rdpq_inited; + +/** @brief Public rdpq_fence API, redefined it */ +extern void rdpq_fence(void); + +///@cond +typedef struct rdpq_block_s rdpq_block_t; +typedef struct rdpq_trifmt_s rdpq_trifmt_t; +///@endcond + +/** + * @brief RDP tracking state + * + * This structure contains information that refer to the state of the RDP, + * tracked by the CPU as it enqueues RDP instructions.ì + * + * Tracking the RDP state on the CPU is in general possible (as all + * RDP commands are supposed to go through rdpq, when it is used), but it + * doesn't fully work across blocks. In fact, blocks can be called in + * multiple call sites with different RDP states, so it would be wrong + * to do any assumption on the RDP state while generating the block. + * + * Thus, this structure is reset at some default by #__rdpq_block_begin, + * and then its previous state is restored by #__rdpq_block_end. + */ +typedef struct { + /** + * @brief State of the autosync engine. + * + * The state of the autosync engine is a 32-bit word, where bits are + * mapped to specific internal resources of the RDP that might be in + * use. The mapping of the bits is indicated by the `AUTOSYNC_TILE`, + * `AUTOSYNC_TMEM`, and `AUTOSYNC_PIPE` + * + * When a bit is set to 1, the corresponding resource is "in use" + * by the RDP. For instance, drawing a textured rectangle can use + * a tile and the pipe (which contains most of the mode registers). + */ + uint32_t autosync : 17; + /** @brief True if the mode changes are currently frozen. */ + bool mode_freeze : 1; + /** @brief 0=unknown, 1=standard, 2=copy/fill */ + uint8_t cycle_type_known : 2; + uint8_t cycle_type_frozen : 2; +} rdpq_tracking_t; + +extern rdpq_tracking_t rdpq_tracking; + +/** + * @brief A buffer that piggybacks onto rspq_block_t to store RDP commands + * + * In rspq blocks, raw RDP commands are not stored as passthroughs for performance. + * Instead, they are stored in a parallel buffer in RDRAM and the RSP block contains + * commands to send (portions of) this buffer directly to RDP via DMA. This saves + * memory bandwidth compared to doing passthrough for every command. + * + * Since the buffer can grow during creation, it is stored as a linked list of buffers. + */ +typedef struct rdpq_block_s { + rdpq_block_t *next; ///< Link to next buffer (or NULL if this is the last one for this block) + rdpq_tracking_t tracking; ///< Tracking state at the end of a block (this is populated only on the first link) + uint32_t cmds[] __attribute__((aligned(8))); ///< RDP commands +} rdpq_block_t; + +/** + * @brief RDP block management state + * + * This is the internal state used by rdpq.c to manage block creation. + */ +typedef struct rdpq_block_state_s { + /** @brief During block creation, current write pointer within the RDP buffer. */ + volatile uint32_t *wptr; + /** @brief During block creation, pointer to the end of the RDP buffer. */ + volatile uint32_t *wend; + /** @brief Previous wptr, swapped out to go back to dynamic buffer. */ + volatile uint32_t *pending_wptr; + /** @brief Previous wend, swapped out to go back to dynamic buffer. */ + volatile uint32_t *pending_wend; + /** @brief Point to the RDP block being created */ + rdpq_block_t *last_node; + /** @brief Point to the first link of the RDP block being created */ + rdpq_block_t *first_node; + /** @brief Current buffer size for RDP blocks */ + int bufsize; + /** + * During block creation, this variable points to the last + * #RSPQ_CMD_RDP_APPEND_BUFFER command, that can be coalesced + * in case a pure RDP command is enqueued next. + */ + volatile uint32_t *last_rdp_append_buffer; + /** + * @brief Tracking state before starting building the block. + */ + rdpq_tracking_t previous_tracking; +} rdpq_block_state_t; + +void __rdpq_block_begin(); +rdpq_block_t* __rdpq_block_end(); +void __rdpq_block_free(rdpq_block_t *block); +void __rdpq_block_run(rdpq_block_t *block); +void __rdpq_block_next_buffer(void); +void __rdpq_block_update(volatile uint32_t *wptr); +void __rdpq_block_reserve(int num_rdp_commands); + +inline void __rdpq_autosync_use(uint32_t res) +{ + rdpq_tracking.autosync |= res; +} +void __rdpq_autosync_change(uint32_t res); + +void __rdpq_write8(uint32_t cmd_id, uint32_t arg0, uint32_t arg1); +void __rdpq_write16(uint32_t cmd_id, uint32_t arg0, uint32_t arg1, uint32_t arg2, uint32_t arg3); + +void rdpq_triangle_cpu(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3); +void rdpq_triangle_rsp(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3); + + +///@cond +/* Helpers for rdpq_passthrough_write / rdpq_fixup_write */ +#define __rdpcmd_count_words2(rdp_cmd_id, arg0, ...) nwords += __COUNT_VARARGS(__VA_ARGS__) + 1; +#define __rdpcmd_count_words(arg) __rdpcmd_count_words2 arg + +#define __rdpcmd_write_arg(arg) *ptr++ = arg; +#define __rdpcmd_write2(rdp_cmd_id, arg0, ...) \ + *ptr++ = (RDPQ_OVL_ID + ((rdp_cmd_id)<<24)) | (arg0); \ + __CALL_FOREACH_BIS(__rdpcmd_write_arg, ##__VA_ARGS__); +#define __rdpcmd_write(arg) __rdpcmd_write2 arg + +#define __rspcmd_write(...) ({ rspq_write(RDPQ_OVL_ID, __VA_ARGS__ ); }) +///@endcond + +/** + * @brief Write a passthrough RDP command into the rspq queue + * + * This macro handles writing a single RDP command into the rspq queue. It must be + * used only with raw commands aka passthroughs, that is commands that are not + * intercepted by RSP in any way, but just forwarded to RDP. + * + * In block mode, the RDP command will be written to the static RDP buffer instead, + * so that it will be sent directly to RDP without going through RSP at all. + * + * Example syntax (notice the double parenthesis): + * + * rdpq_passthrough_write((RDPQ_CMD_SYNC_PIPE, 0, 0)); + * + * @hideinitializer + */ +#define rdpq_passthrough_write(rdp_cmd) ({ \ + if (__builtin_expect(rspq_in_block(), 0)) { \ + extern rdpq_block_state_t rdpq_block_state; \ + int nwords = 0; __rdpcmd_count_words(rdp_cmd); \ + if (__builtin_expect(rdpq_block_state.wptr + nwords > rdpq_block_state.wend, 0)) \ + __rdpq_block_next_buffer(); \ + volatile uint32_t *ptr = rdpq_block_state.wptr; \ + __rdpcmd_write(rdp_cmd); \ + __rdpq_block_update((uint32_t*)ptr); \ + } else { \ + __rspcmd_write rdp_cmd; \ + } \ +}) + +#endif diff --git a/src/rdpq/rdpq_mode.c b/src/rdpq/rdpq_mode.c new file mode 100644 index 0000000000..877e931562 --- /dev/null +++ b/src/rdpq/rdpq_mode.c @@ -0,0 +1,169 @@ +/** + * @file rdpq_mode.c + * @brief RDP Command queue: mode setting + * @ingroup rdp + */ + +#include "rdpq_mode.h" +#include "rspq.h" +#include "rdpq_internal.h" + +/** + * @brief Like #rdpq_write, but for mode commands. + * + * During freeze (#rdpq_mode_begin), mode commands don't emit RDP commands + * as they are batched instead, so we can avoid reserving space in the + * RDP static buffer in blocks. + */ +#define rdpq_mode_write(num_rdp_commands, ...) ({ \ + rdpq_write(rdpq_tracking.mode_freeze ? 0 : num_rdp_commands, ##__VA_ARGS__); \ +}) + +/** + * @brief Write a fixup that changes the current render mode (8-byte command) + * + * All the mode fixups always need to update the RDP render mode + * and thus generate two RDP commands: SET_COMBINE and SET_OTHER_MODES. + */ +__attribute__((noinline)) +void __rdpq_fixup_mode(uint32_t cmd_id, uint32_t w0, uint32_t w1) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + rdpq_mode_write(2, RDPQ_OVL_ID, cmd_id, w0, w1); // COMBINE+SOM +} + +/** @brief Write a fixup that changes the current render mode (12-byte command) */ +__attribute__((noinline)) +void __rdpq_fixup_mode3(uint32_t cmd_id, uint32_t w0, uint32_t w1, uint32_t w2) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + rdpq_mode_write(2, RDPQ_OVL_ID, cmd_id, w0, w1, w2); // COMBINE+SOM + +} + +/** @brief Write a fixup that changes the current render mode (16-byte command) */ +__attribute__((noinline)) +void __rdpq_fixup_mode4(uint32_t cmd_id, uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + rdpq_mode_write(2, RDPQ_OVL_ID, cmd_id, w0, w1, w2, w3); // COMBINE+SOM +} + +/** @brief Write a fixup to reset the render mode */ +__attribute__((noinline)) +void __rdpq_reset_render_mode(uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3) +{ + __rdpq_autosync_change(AUTOSYNC_PIPE); + // ResetRenderMode can genereate: SCISSOR+COMBINE+SOM + rdpq_mode_write(3, RDPQ_OVL_ID, RDPQ_CMD_RESET_RENDER_MODE, w0, w1, w2, w3); +} + +void rdpq_mode_push(void) +{ + // Push is not a RDP passthrough/fixup command, it's just a standard + // RSP command. Use rspq_write. + rspq_write(RDPQ_OVL_ID, RDPQ_CMD_PUSH_RENDER_MODE, 0, 0); +} + +void rdpq_mode_pop(void) +{ + __rdpq_fixup_mode(RDPQ_CMD_POP_RENDER_MODE, 0, 0); +} + +/** @brief Like #rdpq_set_mode_fill, but without fill color configuration */ +void __rdpq_set_mode_fill(void) { + uint64_t som = (0xEFull << 56) | SOM_CYCLE_FILL; + __rdpq_reset_render_mode(0, 0, som >> 32, som & 0xFFFFFFFF); + if (!rdpq_tracking.mode_freeze) + rdpq_tracking.cycle_type_known = 2; + else + rdpq_tracking.cycle_type_frozen = 2; +} + +void rdpq_set_mode_copy(bool transparency) { + uint64_t som = (0xEFull << 56) | SOM_CYCLE_COPY | (transparency ? SOM_ALPHACOMPARE_THRESHOLD : 0); + __rdpq_reset_render_mode(0, 0, som >> 32, som & 0xFFFFFFFF); + if (!rdpq_tracking.mode_freeze) + rdpq_tracking.cycle_type_known = 2; + else + rdpq_tracking.cycle_type_frozen = 2; +} + +void rdpq_set_mode_standard(void) { + uint64_t cc = RDPQ_COMBINER1( + (ZERO, ZERO, ZERO, TEX0), (ZERO, ZERO, ZERO, TEX0) + ); + uint64_t som = + SOM_TF0_RGB | SOM_TF1_RGB | + SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | + SOM_COVERAGE_DEST_ZAP; + + __rdpq_reset_render_mode( + cc >> 32, cc & 0xFFFFFFFF, + som >> 32, som & 0xFFFFFFFF); + rdpq_mode_combiner(cc); // FIXME: this should not be required, but we need it for the mipmap mask + if (!rdpq_tracking.mode_freeze) + rdpq_tracking.cycle_type_known = 1; + else + rdpq_tracking.cycle_type_frozen = 1; +} + +void rdpq_set_mode_yuv(bool bilinear) { + uint64_t cc, som; + + if (!bilinear) { + som = SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | SOM_TF0_YUV; + cc = RDPQ_COMBINER1((TEX0, K4, K5, TEX0), (ZERO, ZERO, ZERO, ONE)); + } else { + som = SOM_RGBDITHER_NONE | SOM_ALPHADITHER_NONE | SOM_SAMPLE_BILINEAR | SOM_TF0_RGB | SOM_TF1_YUVTEX0; + cc = RDPQ_COMBINER2((TEX1, K4, K5, TEX1), (ZERO, ZERO, ZERO, ONE), + (ZERO, ZERO, ZERO, COMBINED), (ZERO, ZERO, ZERO, COMBINED)); + } + __rdpq_reset_render_mode( + cc >> 32, cc & 0xFFFFFFFF, + som >> 32, som & 0xFFFFFFFF); + if (!rdpq_tracking.mode_freeze) + rdpq_tracking.cycle_type_known = 1; + else + rdpq_tracking.cycle_type_frozen = 1; + + rdpq_set_yuv_parms(179,-44,-91,227,111,43); // BT.601 coefficients (Kr=0.299, Kb=0.114, TV range) +} + +void rdpq_mode_begin(void) +{ + // Freeze render mode updates. We call rdpq_change_other_modes_raw here + // (instead of __rdpq_mode_change_som) because there will be no RDP + // commands emitted from this call. + rdpq_tracking.mode_freeze = true; + rdpq_tracking.cycle_type_frozen = 0; + __rdpq_mode_change_som(SOMX_UPDATE_FREEZE, SOMX_UPDATE_FREEZE); +} + +void rdpq_mode_end(void) +{ + // Unfreeze render mode updates and recalculate new render mode. + rdpq_tracking.mode_freeze = false; + rdpq_tracking.cycle_type_known = rdpq_tracking.cycle_type_frozen; + __rdpq_mode_change_som(SOMX_UPDATE_FREEZE, 0); +} + + +/* Extern inline instantiations. */ +extern inline void rdpq_set_mode_fill(color_t color); +extern inline void rdpq_set_mode_standard(void); +extern inline void rdpq_mode_combiner(rdpq_combiner_t comb); +extern inline void rdpq_mode_blender(rdpq_blender_t blend); +extern inline void rdpq_mode_antialias(rdpq_antialias_t mode); +extern inline void rdpq_mode_fog(rdpq_blender_t fog); +extern inline void rdpq_mode_dithering(rdpq_dither_t dither); +extern inline void rdpq_mode_alphacompare(int threshold); +extern inline void rdpq_mode_zbuf(bool compare, bool write); +extern inline void rdpq_mode_zoverride(bool enable, float z, int16_t deltaz); +extern inline void rdpq_mode_tlut(rdpq_tlut_t tlut); +extern inline void rdpq_mode_filter(rdpq_filter_t s); +extern inline void rdpq_mode_mipmap(rdpq_mipmap_t mode, int num_levels); +extern inline void rdpq_mode_persp(bool perspective); +///@cond +extern inline void __rdpq_mode_change_som(uint64_t mask, uint64_t val); +///@endcond diff --git a/src/rdpq/rdpq_rect.c b/src/rdpq/rdpq_rect.c new file mode 100644 index 0000000000..9a3d10aa3d --- /dev/null +++ b/src/rdpq/rdpq_rect.c @@ -0,0 +1,74 @@ +/** + * @file rdpq.h + * @brief RDP Command queue + * @ingroup rdp + * + */ + +#include "rdpq_rect.h" +#include "rdpq_internal.h" + +// The fixup for fill rectangle and texture rectangle uses the exact same code in IMEM. +// It needs to also adjust the command ID with the same constant (via XOR), so make +// sure that we defined the fixups in the right position to make that happen. +_Static_assert( + (RDPQ_CMD_FILL_RECTANGLE ^ RDPQ_CMD_FILL_RECTANGLE_EX) == + (RDPQ_CMD_TEXTURE_RECTANGLE ^ RDPQ_CMD_TEXTURE_RECTANGLE_EX), + "invalid command numbering"); + + +/** @brief Out-of-line implementation of #rdpq_texture_rectangle */ +__attribute__((noinline)) +void __rdpq_fill_rectangle(uint32_t w0, uint32_t w1) +{ + __rdpq_autosync_use(AUTOSYNC_PIPE); + if (rdpq_tracking.cycle_type_known) { + if (rdpq_tracking.cycle_type_known == 2) { + w0 -= (4<<12) | 4; + } + rdpq_passthrough_write((RDPQ_CMD_FILL_RECTANGLE, w0, w1)); + return; + } + rdpq_write(1, RDPQ_OVL_ID, RDPQ_CMD_FILL_RECTANGLE_EX, w0, w1); +} + +void __rdpq_fill_rectangle_offline(int32_t x0, int32_t y0, int32_t x1, int32_t y1) { + __rdpq_fill_rectangle_inline(x0, y0, x1, y1); +} + +/** @brief Out-of-line implementation of #rdpq_texture_rectangle */ +__attribute__((noinline)) +void __rdpq_texture_rectangle(uint32_t w0, uint32_t w1, uint32_t w2, uint32_t w3) +{ + int tile = (w1 >> 24) & 7; + // FIXME: this can also use tile+1 in case the combiner refers to TEX1 + // FIXME: this can also use tile+2 and +3 in case SOM activates texture detail / sharpen + __rdpq_autosync_use(AUTOSYNC_PIPE | AUTOSYNC_TILE(tile) | AUTOSYNC_TMEM(0)); + if (rdpq_tracking.cycle_type_known) { + if (rdpq_tracking.cycle_type_known == 2) { + w0 -= (4<<12) | 4; + w3 = ((w3 & 0xFFFF0000) << 2) | (w3 & 0x0000FFFF); + } + rdpq_passthrough_write((RDPQ_CMD_TEXTURE_RECTANGLE, w0, w1, w2, w3)); + return; + } + + rdpq_write(2, RDPQ_OVL_ID, RDPQ_CMD_TEXTURE_RECTANGLE_EX, w0, w1, w2, w3); +} + +void __rdpq_texture_rectangle_offline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0) { + __rdpq_texture_rectangle_inline(tile, x0, y0, x1, y1, s0, t0); +} + +void __rdpq_texture_rectangle_scaled_offline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0, int32_t s1, int32_t t1) { + __rdpq_texture_rectangle_scaled_inline(tile, x0, y0, x1, y1, s0, t0, s1, t1); +} + +extern inline void __rdpq_fill_rectangle_inline(int32_t x0, int32_t y0, int32_t x1, int32_t y1); +extern inline void __rdpq_fill_rectangle_fx(int32_t x0, int32_t y0, int32_t x1, int32_t y1); +extern inline void __rdpq_texture_rectangle_fx(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s, int32_t t); +extern inline void __rdpq_texture_rectangle_scaled_fx(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0, int32_t s1, int32_t t1); +extern inline void __rdpq_texture_rectangle_raw_fx(rdpq_tile_t tile, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint16_t s0, uint16_t t0, int16_t dsdx, int16_t dtdy); +extern inline void __rdpq_texture_rectangle_flip_raw_fx(rdpq_tile_t tile, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, int16_t s, int16_t t, int16_t dsdy, int16_t dtdx); +extern inline void __rdpq_texture_rectangle_inline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0); +extern inline void __rdpq_texture_rectangle_scaled_inline(rdpq_tile_t tile, int32_t x0, int32_t y0, int32_t x1, int32_t y1, int32_t s0, int32_t t0, int32_t s1, int32_t t1); diff --git a/src/rdpq/rdpq_sprite.c b/src/rdpq/rdpq_sprite.c new file mode 100644 index 0000000000..00298e9fc4 --- /dev/null +++ b/src/rdpq/rdpq_sprite.c @@ -0,0 +1,139 @@ +/** + * @file rdpq_sprite.c + * @brief RDP Command queue: high-level sprite loading and blitting + * @ingroup rdp + */ + +#include "rspq.h" +#include "rdpq.h" +#include "rdpq_sprite.h" +#include "rdpq_sprite_internal.h" +#include "rdpq_mode.h" +#include "rdpq_tex.h" +#include "sprite.h" +#include "sprite_internal.h" + +static void sprite_upload_palette(sprite_t *sprite, int palidx, bool set_mode) +{ + // Check if the sprite has a palette + tex_format_t fmt = sprite_get_format(sprite); + rdpq_tlut_t tlut_mode = rdpq_tlut_from_format(fmt); + + if (__builtin_expect(set_mode, 1)) { + // Configure the TLUT render mode + rdpq_mode_tlut(tlut_mode); + } + + if (tlut_mode != TLUT_NONE) { + // Load the palette (if any). We account for sprites being CI4 + // but without embedded palette: mksprite doesn't create sprites like + // this today, but it could in the future (eg: sharing a palette across + // multiple sprites). + uint16_t *pal = sprite_get_palette(sprite); + if (pal) rdpq_tex_upload_tlut(pal, palidx*16, fmt == FMT_CI4 ? 16 : 256); + } +} + +/** @brief Internal implementation of #rdpq_sprite_upload that will optionally skip setting render modes */ +int __rdpq_sprite_upload(rdpq_tile_t tile, sprite_t *sprite, const rdpq_texparms_t *parms, bool set_mode) +{ + assertf(sprite_fits_tmem(sprite), "sprite doesn't fit in TMEM"); + + // Load main sprite surface + surface_t surf = sprite_get_pixels(sprite); + + // If no texparms were provided but the sprite contains some, use them + rdpq_texparms_t parms_builtin; + if (!parms && sprite_get_texparms(sprite, &parms_builtin)) + parms = &parms_builtin; + + // Check for detail texture + sprite_detail_t detail; rdpq_texparms_t detailtexparms = {0}; + surface_t detailsurf = sprite_get_detail_pixels(sprite, &detail, &detailtexparms); + bool use_detail = detailsurf.buffer != NULL; + + rdpq_tex_multi_begin(); + + if(use_detail){ + // If there is a detail texture, we upload the main texture to TILE+1 and detail texture to TILE+0, then any mipmaps if there are any + rdpq_tile_t detail_tile = tile; + tile = (tile+1) & 7; + + // Setup the blend factor for the detail texture + float factor = detail.blend_factor; + rdpq_set_detail_factor(factor); + + // Setup the texparms for the detail texture + if (parms) { + detailtexparms.s.translate += parms->s.translate * (1 << (parms->s.scale_log - detailtexparms.s.scale_log)); + detailtexparms.t.translate += parms->t.translate * (1 << (parms->t.scale_log - detailtexparms.t.scale_log)); + } + + // Upload the detail texture if necessary or reuse the main texture + if(detail.use_main_tex){ + rdpq_tex_upload(tile, &surf, parms); + rdpq_tex_reuse(detail_tile, &detailtexparms); + } + else { + rdpq_tex_upload(detail_tile, &detailsurf, &detailtexparms); + rdpq_tex_upload(tile, &surf, parms); + } + } + else // Upload the main texture + rdpq_tex_upload(tile, &surf, parms); + + // Upload mipmaps if any + int num_mipmaps = 0; + rdpq_texparms_t lod_parms; + for (int i=1; i<8; i++) { + surf = sprite_get_lod_pixels(sprite, i); + if (!surf.buffer) break; + + // if this is the first lod, initialize lod parameters + if (i==1) { + if (!parms) { + memset(&lod_parms, 0, sizeof(lod_parms)); + } else { + lod_parms = *parms; + } + } + + // Update parameters for next lod. If the scale maxes out, stop here + num_mipmaps++; + tile = (tile+1) & 7; + if (++lod_parms.s.scale_log >= 11) break; + if (++lod_parms.t.scale_log >= 11) break; + lod_parms.s.translate *= 0.5f; + lod_parms.t.translate *= 0.5f; + + // Load the mipmap + rdpq_tex_upload(tile, &surf, &lod_parms); + } + + if (__builtin_expect(set_mode, 1)) { + // Enable/disable mipmapping + if(use_detail) rdpq_mode_mipmap(MIPMAP_INTERPOLATE_DETAIL, num_mipmaps+1); + else if (num_mipmaps) rdpq_mode_mipmap(MIPMAP_INTERPOLATE, num_mipmaps); + else rdpq_mode_mipmap(MIPMAP_NONE, 0); + } + + // Upload the palette and configure the render mode + sprite_upload_palette(sprite, parms ? parms->palette : 0, set_mode); + + return rdpq_tex_multi_end(); +} + +int rdpq_sprite_upload(rdpq_tile_t tile, sprite_t *sprite, const rdpq_texparms_t *parms) +{ + return __rdpq_sprite_upload(tile, sprite, parms, true); +} + +void rdpq_sprite_blit(sprite_t *sprite, float x0, float y0, const rdpq_blitparms_t *parms) +{ + // Upload the palette and configure the render mode + sprite_upload_palette(sprite, 0, true); + + // Get the sprite surface + surface_t surf = sprite_get_pixels(sprite); + rdpq_tex_blit(&surf, x0, y0, parms); +} diff --git a/src/rdpq/rdpq_sprite_internal.h b/src/rdpq/rdpq_sprite_internal.h new file mode 100644 index 0000000000..a4069939d6 --- /dev/null +++ b/src/rdpq/rdpq_sprite_internal.h @@ -0,0 +1,12 @@ +#ifndef LIBDRAGON_RDPQ_SPRITE_INTERNAL_H +#define LIBDRAGON_RDPQ_SPRITE_INTERNAL_H + +#include "rdpq.h" + +///@cond +typedef struct rdpq_texparms_s rdpq_texparms_t; +///@endcond + +int __rdpq_sprite_upload(rdpq_tile_t tile, sprite_t *sprite, const rdpq_texparms_t *parms, bool set_mode); + +#endif diff --git a/src/rdpq/rdpq_tex.c b/src/rdpq/rdpq_tex.c new file mode 100644 index 0000000000..aa9fc21a2c --- /dev/null +++ b/src/rdpq/rdpq_tex.c @@ -0,0 +1,696 @@ +/** + * @file rdpq_tex.c + * @brief RDP Command queue: texture loading + * @ingroup rdp + */ + +///@cond +#define _GNU_SOURCE // Activate GNU extensions in math.h (sincosf) +///@endcond +#include "rdpq.h" +#include "rdpq_tri.h" +#include "rdpq_rect.h" +#include "rdpq_tex.h" +#include "rdpq_tex_internal.h" +#include "utils.h" +#include + +/** @brief Non-zero if we are doing a multi-texture upload */ +typedef struct rdpq_multi_upload_s { + int used; + int bytes; + int limit; +} rdpq_multi_upload_t; +static rdpq_multi_upload_t multi_upload; +/** @brief Information on last image uploaded we are doing a multi-texture upload */ +tex_loader_t last_tload; + +/** @brief Address in TMEM where the palettes must be loaded */ +#define TMEM_PALETTE_ADDR 0x800 + +/// @brief Calculates the first power of 2 that is equal or larger than size +/// @param x input in units +/// @return Power of 2 that is equal or larger than x +int integer_to_pow2(int x){ + int res = 0; + while(1<texparms; + int width = tload->rect.width; + int height = tload->rect.height; + + assertf((width > 0 && height > 0), + "The sub rectangle of a texture can't be of negative size (%i,%i)", width, height); + assertf(parms->s.repeats >= 0 && parms->t.repeats >= 0, + "Repetition count (%f, %f) cannot be negative", parms->s.repeats, parms->t.repeats); + + int xmask = 0; + int ymask = 0; + + rdpq_tileparms_t *res = &tload->tileparms; + + if(parms->s.repeats > 1){ + xmask = integer_to_pow2(width); + assertf(1<s.mirror = parms->s.mirror; + } + if(parms->t.repeats > 1){ + ymask = integer_to_pow2(height); + assertf(1<t.mirror = parms->t.mirror; + } + + res->s.shift = parms->s.scale_log; + res->t.shift = parms->t.scale_log; + if(parms->s.repeats * width < 1024) res->s.clamp = true; + else res->s.clamp = false; + if(parms->t.repeats * height < 1024) res->t.clamp = true; + else res->t.clamp = false; + + assertf((!res->s.clamp || parms->s.translate >= 0), + "Translation S (%f) cannot be negative with active clamping", parms->s.translate); + assertf((!res->t.clamp || parms->t.translate >= 0), + "Translation T (%f) cannot be negative with active clamping", parms->t.translate); + + float srepeats = parms->s.repeats; + float trepeats = parms->t.repeats; + if(F2I(srepeats) > 0) { + res->s.mask = xmask; + } else + srepeats = 1; + if(F2I(parms->t.repeats) > 0) { + res->t.mask = ymask; + } else + trepeats = 1; + + tload->rect.s0fx = parms->s.translate*4; + tload->rect.t0fx = parms->t.translate*4; + tload->rect.s1fx = (parms->s.translate + (srepeats - 1) * width * res->s.clamp)*4; + tload->rect.t1fx = (parms->t.translate + (trepeats - 1) * height * res->s.clamp)*4; +} + + +/** @brief Precomputes everything required for loading the rect (s0,t0)-(s1,t1) + * + * This function prepares for a new TMEM load for the specified rectangle. Since it is very + * common to invoke multiple different rects with similar width and/or height, this function + * tries to compute only what needs to be done with respect the previous load. Specifically: + * + * * If the width of the rectangle changed, we need to compute the TMEM pitch, and verifies + * whether we can use LOAD_BLOCK. We can check basic constaints with the width, but there + * will be a maximum number of lines that can be transferred with LOAD_BLOCK. + * * If the height of the rectangle changed, we can calculate the total number of texels + * and complete the LOAD_BLOCK calculation by verifying that the height is within the + * maximum allowed range. + */ +static int texload_set_rect(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + // For now, we don't support clamping/mirroring, as that would require + // additional logic here to select the proper pixels + assertf(s1 <= tload->tex->width && t1 <= tload->tex->height, "rdpq tex loader does not support clamping/mirroring"); + + tex_format_t fmt = surface_get_format(tload->tex); + if (TEX_FORMAT_BITDEPTH(fmt) == 4) { + s0 &= ~1; s1 = (s1+1) & ~1; + } + + int width = s1 - s0; + int height = t1 - t0; + + if (width != tload->rect.width || height != tload->rect.height) { + if (width != tload->rect.width) { + // Calculate he new pitch in TMEM (in bytes). Notice that RGBA32 is special + // as texture data is split in two halves, so the pitch can be halved. + int pitch_shift = fmt == FMT_RGBA32 ? 1 : 0; + int stride_mask = fmt == FMT_RGBA32 ? 15 : 7; + tload->rect.tmem_pitch = ROUND_UP(TEX_FORMAT_PIX2BYTES(fmt, width) >> pitch_shift, 8); + + // Verify whether we can use LOAD_BLOCK. The conditions we can verify just by looking at the + // width are: + // * The rectangle to load cover the whole texture horizontally, and the texture does not + // contain extraneous data at the end of each line. + // * The width of the texture is a multiple of 8 bytes (or 16 bytes, in case of RGBA32). + bool can_load_block_width = + TEX_FORMAT_PIX2BYTES(fmt, width) == tload->tex->stride && + (tload->tex->stride & stride_mask) == 0; + + if (can_load_block_width) { + // If the requirements are satisfied, we need to compute the maximum number of lines + // that can be loaded with LOAD_BLOCK. In fact, RDP uses fixed point precision; + // the DXT parameter in the LOAD_BLOCK command is a 1.10 fixed point number, so + // there is a precision error after a certain number of lines that can cause artifacts. + + // We precomputed a table that stores the maximum number of lines for each possible width. + // (actually, for each possible pitch / 8, given that the pitch must be a multiple of 8). + // This table was generated by the following Python code: + // + // # (thanks to glank for describing a neat way to find the error in dxt per line) + // words_per_line = line_bytes // 8 + // dxt = (1 << 11) / words_per_line + // # dxt is rounded up, so the error is 1 - the fractional part of dxt + // err = 1.0 - math.modf(dxt)[0] + // # the error per line is the error per 64-bit word * the number of words + // err_per_line = words_per_line * err + // # the maximum number of lines before this becomes an issue is + // max_lines = math.floor(dxt / err_per_line) + // + // The table doesn't contain the first 11 entries as they are all unlimited (that is, the error does not happen + // within the 4K TMEM size). + static const uint8_t block_max_lines_table[] = { 20, 42, 26, 14, 19, 32, 13, 28, 26, 8, 9, 4, 4, 5, 20, 13, 18, 3, 6, 3, 2, 16, 2, 2, 3, 14, 2, 13, 2, 1, 12, 4, 2, 2, 2, 2, 2, 2, 4, 10, 0, 1, 2, 9, 0, 1, 8, 0, 2, 0, 1, 0, 1, 8, 0, 0, 1, 0, 1, 0, 2, 0, 0, 1, 0, 6, 0, 0, 4, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + int words = tload->rect.tmem_pitch / 8; + if (words >= 11) + tload->rect.block_max_lines = block_max_lines_table[words - 11]; + else + tload->rect.block_max_lines = 4096; // arbitrary high number, it will be limited by TMEM size anyway + } else { + tload->rect.block_max_lines = 0; + } + + // Invalidate the current load mode. This will force the next load_tile function to reissue + // the RDP configuration. + tload->load_mode = TEX_LOAD_UNKNOWN; + } + + // If the height changed, complete filling the rect structure, + // and calculate whether we can really use LOAD_BLOCK or not. + int tmem_size = (fmt == FMT_RGBA32 || fmt == FMT_CI4 || fmt == FMT_CI8) ? 2048 : 4096; + assertf(height * tload->rect.tmem_pitch <= tmem_size, + "A rectangle of size %dx%d format %s is too big to fit in TMEM", width, height, tex_format_name(fmt)); + tload->rect.width = width; + tload->rect.height = height; + tload->rect.num_texels = width * height; + tload->rect.can_load_block = height <= tload->rect.block_max_lines; + tload->rect.s0fx = tload->rect.s1fx = tload->rect.t0fx = tload->rect.t1fx = 0; + if (tload->texparms) texload_recalc_tileparms(tload); + } + return tload->rect.tmem_pitch * height; +} + +static void texload_block_4bpp(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + rdpq_tile_t tile_internal = (tload->tile + 1) & 7; + if (tload->load_mode != TEX_LOAD_BLOCK) { + // Use LOAD_BLOCK if we are uploading a full texture. Notice the weirdness of LOAD_BLOCK: + // * SET_TILE must be configured with tmem_pitch=0, as that is weirdly used as the number of + // texels to skip per line, which we don't need. + assertf(ROUND_UP(tload->tex->width, 2) % 4 == 0, "Internal Error: invalid width for LOAD_BLOCK (%d)", tload->tex->width); + rdpq_set_texture_image_raw(surface_get_placeholder_index(tload->tex), PhysicalAddr(tload->tex->buffer), FMT_RGBA16, (tload->tex->width+1)/4, tload->tex->height); + rdpq_set_tile(tile_internal, FMT_RGBA16, tload->tmem_addr, 0, NULL); + rdpq_set_tile(tload->tile, surface_get_format(tload->tex), tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + tload->load_mode = TEX_LOAD_BLOCK; + } + + s0 &= ~1; s1 = (s1+1) & ~1; + rdpq_load_block(tile_internal, s0/2, t0, tload->rect.num_texels/4, tload->rect.tmem_pitch); + + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + +static void texload_block_8bpp(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + rdpq_tile_t tile_internal = (tload->tile + 1) & 7; + tex_format_t fmt = surface_get_format(tload->tex); + if (tload->load_mode != TEX_LOAD_BLOCK) { + // Use LOAD_BLOCK if we are uploading a full texture. Notice the weirdness of LOAD_BLOCK: + // * SET_TILE must be configured with tmem_pitch=0, as that is weirdly used as the number of + // texels to skip per line, which we don't need. + rdpq_set_texture_image_raw(surface_get_placeholder_index(tload->tex), PhysicalAddr(tload->tex->buffer), FMT_RGBA16, tload->tex->width/2, tload->tex->height); + rdpq_set_tile(tile_internal, FMT_RGBA16, tload->tmem_addr, 0, NULL); + rdpq_set_tile(tload->tile, fmt, tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + tload->load_mode = TEX_LOAD_BLOCK; + } + + rdpq_load_block(tile_internal, s0/2, t0, tload->rect.num_texels/2, tload->rect.tmem_pitch); + + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + +static void texload_block(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + rdpq_tile_t tile_internal = (tload->tile + 1) & 7; + tex_format_t fmt = surface_get_format(tload->tex); + if (tload->load_mode != TEX_LOAD_BLOCK) { + // Use LOAD_BLOCK if we are uploading a full texture. Notice the weirdness of LOAD_BLOCK: + // * SET_TILE must be configured with tmem_pitch=0, as that is weirdly used as the number of + // texels to skip per line, which we don't need. + rdpq_set_texture_image_raw(surface_get_placeholder_index(tload->tex), PhysicalAddr(tload->tex->buffer), fmt, tload->tex->width, tload->tex->height); + rdpq_set_tile(tile_internal, fmt, tload->tmem_addr, 0, NULL); + rdpq_set_tile(tload->tile, fmt, tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + tload->load_mode = TEX_LOAD_BLOCK; + } + + rdpq_load_block(tile_internal, s0, t0, tload->rect.num_texels, (fmt == FMT_RGBA32) ? tload->rect.tmem_pitch*2 : tload->rect.tmem_pitch); + + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + +static void texload_tile_4bpp(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + rdpq_tile_t tile_internal = (tload->tile + 1) & 7; + if (tload->load_mode != TEX_LOAD_TILE) { + rdpq_set_texture_image_raw(surface_get_placeholder_index(tload->tex), PhysicalAddr(tload->tex->buffer), FMT_I8, tload->tex->stride, tload->tex->height); + rdpq_set_tile(tile_internal, FMT_I8, tload->tmem_addr, tload->rect.tmem_pitch, NULL); + rdpq_set_tile(tload->tile, surface_get_format(tload->tex), tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + tload->load_mode = TEX_LOAD_TILE; + } + + s0 &= ~1; s1 = (s1+1) & ~1; + rdpq_load_tile(tile_internal, s0/2, t0, s1/2, t1); + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + +static void texload_tile(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + tex_format_t fmt = surface_get_format(tload->tex); + if (tload->load_mode != TEX_LOAD_TILE) { + rdpq_set_texture_image(tload->tex); + rdpq_set_tile(tload->tile, fmt, tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + tload->load_mode = TEX_LOAD_TILE; + } + + rdpq_load_tile(tload->tile, s0, t0, s1, t1); + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + + +static void texload_settile(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + tex_format_t fmt = surface_get_format(tload->tex); + + rdpq_set_tile(tload->tile, fmt, tload->tmem_addr, tload->rect.tmem_pitch, &(tload->tileparms)); + + s0 = s0*4 + tload->rect.s0fx; + t0 = t0*4 + tload->rect.t0fx; + s1 = s1*4 + tload->rect.s1fx; + t1 = t1*4 + tload->rect.t1fx; + rdpq_set_tile_size_fx(tload->tile, s0, t0, s1, t1); +} + +///@cond +// Tex loader API, not yet documented +int tex_loader_load(tex_loader_t *tload, int s0, int t0, int s1, int t1) +{ + assertf(s0 <= s1, "Invalid texture load: s0:%d s1:%d", s0, s1); + assertf(t0 <= t1, "Invalid texture load: t0:%d t1:%d", t0, t1); + int mem = texload_set_rect(tload, s0, t0, s1, t1); + if (tload->rect.can_load_block && (t0 & 1) == 0) + tload->load_block(tload, s0, t0, s1, t1); + else + tload->load_tile(tload, s0, t0, s1, t1); + return mem; +} + +tex_loader_t tex_loader_init(rdpq_tile_t tile, const surface_t *tex) { + int bpp = TEX_FORMAT_BITDEPTH(surface_get_format(tex)); + bool is_4bpp = bpp == 4; + bool is_8bpp = bpp == 8; + return (tex_loader_t){ + .tex = tex, + .tile = tile, + .load_block = is_4bpp ? texload_block_4bpp : (is_8bpp ? texload_block_8bpp : texload_block), + .load_tile = is_4bpp ? texload_tile_4bpp : texload_tile, + }; +} + +void tex_loader_set_texparms(tex_loader_t *tload, const rdpq_texparms_t *parms) +{ + tload->texparms = parms; + tload->rect.width = tload->rect.height = 0; // Force recalculation of rect-dependent paramaters +} + +void tex_loader_set_tmem_addr(tex_loader_t *tload, int tmem_addr) +{ + tload->tmem_addr = tmem_addr; + tload->load_mode = TEX_LOAD_UNKNOWN; +} + +int tex_loader_calc_max_height(tex_loader_t *tload, int width) +{ + texload_set_rect(tload, 0, 0, width, 1); + + tex_format_t fmt = surface_get_format(tload->tex); + int tmem_size = (fmt == FMT_RGBA32 || fmt == FMT_CI4 || fmt == FMT_CI8) ? 2048 : 4096; + return tmem_size / tload->rect.tmem_pitch; +} + +///@endcond + +int rdpq_tex_upload_sub(rdpq_tile_t tile, const surface_t *tex, const rdpq_texparms_t *parms, int s0, int t0, int s1, int t1) +{ + last_tload = tex_loader_init(tile, tex); + if (parms) tex_loader_set_texparms(&last_tload, parms); + + if (multi_upload.used) { + assertf(parms == NULL || parms->tmem_addr == 0, "Do not specify a TMEM address while doing a multi-texture upload"); + tex_loader_set_tmem_addr(&last_tload, RDPQ_AUTOTMEM); + } else { + tex_loader_set_tmem_addr(&last_tload, parms ? parms->tmem_addr : 0); + } + + int nbytes = tex_loader_load(&last_tload, s0, t0, s1, t1); + + if (multi_upload.used) { + rdpq_set_tile_autotmem(nbytes); + multi_upload.bytes += nbytes; + + #ifndef NDEBUG + // Do a best-effort check to make sure we don't exceed TMEM size. This is not 100% + // guaranteed to catch all cases: if a texture is uploaded via block playback, we will + // not know about its size. Anyway, the RSP will also do check and trigger a RSP assert, + // with the only gotcha that there will be no traceback for it. + tex_format_t fmt = surface_get_format(tex); + if (fmt == FMT_CI4 || fmt == FMT_CI8 || fmt == FMT_RGBA32 || fmt == FMT_YUV16) + multi_upload.limit = 2048; + assertf(multi_upload.bytes <= multi_upload.limit, "Multi-texture upload exceeded TMEM size"); + #endif + } + + return nbytes; +} + +int rdpq_tex_upload(rdpq_tile_t tile, const surface_t *tex, const rdpq_texparms_t *parms) +{ + return rdpq_tex_upload_sub(tile, tex, parms, 0, 0, tex->width, tex->height); +} + +int rdpq_tex_reuse_sub(rdpq_tile_t tile, const rdpq_texparms_t *parms, int s0, int t0, int s1, int t1) +{ + assertf(multi_upload.used, "Reusing existing texture needs to be done through multi-texture upload"); + assertf(last_tload.tex, "Reusing existing texture is not possible without uploading at least one texture first"); + assertf(parms == NULL || parms->tmem_addr == 0, "Do not specify a TMEM address while reusing an existing texture"); + + // Check if just copying a tile descriptor is enough + if(!s0 && !t0 && s1 == last_tload.rect.width && t1 == last_tload.rect.height){ + if(!parms){ + last_tload.tile = tile; + last_tload.tmem_addr = RDPQ_AUTOTMEM_REUSE(0); + texload_settile(&last_tload, s0, t0, s1, t1); + return 0; + } + } + + // Make a new texloader to a new sub-rect + tex_loader_t tload = last_tload; + + assertf(s0 >= 0 && t0 >= 0 && s1 <= tload.rect.width && t1 <= tload.rect.height, "Sub coordinates (%i,%i)-(%i,%i) must be within bounds of the texture reused (%ix%i)", s0, t0, s1, t1, tload.rect.width, tload.rect.height); + assertf(t0 % 2 == 0, "t0=%i must be in multiples of 2 pixels", t0); + + tex_format_t fmt = surface_get_format(tload.tex); + int tmem_offset = TEX_FORMAT_PIX2BYTES(fmt, s0); + + assertf(tmem_offset % 8 == 0, "Due to 8-byte texture alignment, for %s format, s0=%i must be in multiples of %i pixels", tex_format_name(fmt), s0, TEX_FORMAT_BYTES2PIX(fmt, 8)); + + tmem_offset += tload.rect.tmem_pitch*t0; + tload.tmem_addr = RDPQ_AUTOTMEM_REUSE(tmem_offset); + + if(parms) tload.texparms = parms; + int subwidth = s1 - s0, subheight = t1 - t0; + tload.rect.width = subwidth; + tload.rect.height = subheight; + texload_recalc_tileparms(&tload); + + tload.tile = tile; + texload_settile(&tload, 0, 0, subwidth, subheight); + + return 0; +} + +int rdpq_tex_reuse(rdpq_tile_t tile, const rdpq_texparms_t *parms) +{ + return rdpq_tex_reuse_sub(tile, parms, 0, 0, last_tload.rect.width, last_tload.rect.height); +} + +/** + * @brief Implement large_tex_draw protocol via the texloader + * + * This is the most generic implementation, as using the texloader allows to + * support any texture of any size and any format. + */ +static void ltd_texloader(rdpq_tile_t tile, const surface_t *tex, int s0, int t0, int s1, int t1, + void (*draw_cb)(rdpq_tile_t tile, int s0, int t0, int s1, int t1), bool filtering) +{ + // The most efficient way to split a large surface is to load it in horizontal strips, + // whose height maximizes TMEM usage. The last strip might be smaller than the others. + + // Initial configuration of texloader + tex_loader_t tload = tex_loader_init(tile, tex); + + // Calculate the optimal height for a strip, based on strips of maximum length. + int tile_h = tex_loader_calc_max_height(&tload, tex->width); + + // Go through the surface + while (t0 < t1) + { + // Calculate the height of the current strip + int tm = filtering ? MAX(t0 - 1, 0) : t0; + int tn = MIN(tm + tile_h, t1); + + // Load the current strip + tex_loader_load(&tload, s0, tm, s1, tn); + + // Call the draw callback for this strip + int tx = (!filtering || tn == t1) ? tn : tn - 1; + draw_cb(tile, s0, t0, s1, tx); + + // Move to the next strip + t0 = tx; + } +} + +__attribute__((noinline)) +static void tex_xblit_norotate_noscale(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms, large_tex_draw ltd) +{ + rdpq_tile_t tile = parms->tile; + int src_width = parms->width ? parms->width : surf->width; + int src_height = parms->height ? parms->height : surf->height; + int s0 = parms->s0; + int t0 = parms->t0; + int cx = parms->cx + s0; + int cy = parms->cy + t0; + bool flip_x = parms->flip_x; + bool flip_y = parms->flip_y; + + void draw_cb(rdpq_tile_t tile, int s0, int t0, int s1, int t1) + { + int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; + + if (flip_x) { ks0 = src_width - s0 - 1; ks1 = src_width - s1 - 1; } + if (flip_y) { kt0 = src_height - t0 - 1; kt1 = src_height - t1 - 1; } + + rdpq_texture_rectangle(tile, x0 + ks0 - cx, y0 + kt0 - cy, x0 + ks1 - cx, y0 + kt1 - cy, s0, t0); + } + + (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); +} + +__attribute__((noinline)) +static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms, large_tex_draw ltd) +{ + rdpq_tile_t tile = parms->tile; + int src_width = parms->width ? parms->width : surf->width; + int src_height = parms->height ? parms->height : surf->height; + int s0 = parms->s0; + int t0 = parms->t0; + int cx = parms->cx + s0; + int cy = parms->cy + t0; + float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; + float scaley = parms->scale_y == 0 ? 1.0f : parms->scale_y; + bool flip_x = (scalex < 0) ^ parms->flip_x; + bool flip_y = (scaley < 0) ^ parms->flip_y; + + float mtx[3][2] = { + { scalex, 0 }, + { 0, scaley }, + { x0 - cx * scalex, + y0 - cy * scaley } + }; + + void draw_cb(rdpq_tile_t tile, int s0, int t0, int s1, int t1) + { + int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; + + if (flip_x) { ks0 = src_width - s0 - 1; ks1 = src_width - s1 - 1; } + if (flip_y) { kt0 = src_height - t0 - 1; kt1 = src_height - t1 - 1; } + + float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; + float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; + float k2x = mtx[0][0] * ks1 + mtx[1][0] * kt1 + mtx[2][0]; + float k2y = mtx[0][1] * ks1 + mtx[1][1] * kt1 + mtx[2][1]; + + rdpq_texture_rectangle_scaled(tile, k0x, k0y, k2x, k2y, s0, t0, s1, t1); + } + + (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); +} + +__attribute__((noinline)) +static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms, large_tex_draw ltd) +{ + rdpq_tile_t tile = parms->tile; + int src_width = parms->width ? parms->width : surf->width; + int src_height = parms->height ? parms->height : surf->height; + int s0 = parms->s0; + int t0 = parms->t0; + int cx = parms->cx + s0; + int cy = parms->cy + t0; + int nx = parms->nx; + int ny = parms->ny; + float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; + float scaley = parms->scale_y == 0 ? 1.0f : parms->scale_y; + + float sin_theta, cos_theta; + sincosf(parms->theta, &sin_theta, &cos_theta); + + float mtx[3][2] = { + { cos_theta * scalex, -sin_theta * scaley }, + { sin_theta * scalex, cos_theta * scaley }, + { x0 - cx * cos_theta * scalex - cy * sin_theta * scaley, + y0 + cx * sin_theta * scalex - cy * cos_theta * scaley } + }; + + void draw_cb(rdpq_tile_t tile, int s0, int t0, int s1, int t1) + { + int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; + + if (parms->flip_x) { ks0 = src_width - ks0; ks1 = src_width - ks1; } + if (parms->flip_y) { kt0 = src_height - kt0; kt1 = src_height - kt1; } + + float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; + float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; + float k2x = mtx[0][0] * ks1 + mtx[1][0] * kt1 + mtx[2][0]; + float k2y = mtx[0][1] * ks1 + mtx[1][1] * kt1 + mtx[2][1]; + float k1x = mtx[0][0] * ks1 + mtx[1][0] * kt0 + mtx[2][0]; + float k1y = mtx[0][1] * ks1 + mtx[1][1] * kt0 + mtx[2][1]; + float k3x = mtx[0][0] * ks0 + mtx[1][0] * kt1 + mtx[2][0]; + float k3y = mtx[0][1] * ks0 + mtx[1][1] * kt1 + mtx[2][1]; + + float v0[5] = { k0x, k0y, s0, t0, 1.0f }; + float v1[5] = { k1x, k1y, s1, t0, 1.0f }; + float v2[5] = { k2x, k2y, s1, t1, 1.0f }; + float v3[5] = { k3x, k3y, s0, t1, 1.0f }; + rdpq_triangle(&TRIFMT_TEX, v0, v1, v2); + rdpq_triangle(&TRIFMT_TEX, v0, v2, v3); + } + + void draw_cb_multi_rot(rdpq_tile_t tile, int s0, int t0, int s1, int t1) + { + int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; + if (parms->flip_x) { ks0 = src_width - ks0; ks1 = src_width - ks1; } + if (parms->flip_y) { kt0 = src_height - kt0; kt1 = src_height - kt1; } + + assert(s1-s0 == src_width); + + for (int j=0; jfiltering); + } else { + (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); + } +} + +/** @brief Internal implementation of #rdpq_tex_blit, using a custom large tex loader callback function */ +void __rdpq_tex_blit(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms, large_tex_draw ltd) +{ + static const rdpq_blitparms_t default_parms = {0}; + if (!parms) parms = &default_parms; + + // Check which implementation to use, depending on the requested features. + if (F2I(parms->theta) == 0) { + if (F2I(parms->scale_x) == 0 && F2I(parms->scale_y) == 0) + tex_xblit_norotate_noscale(surf, x0, y0, parms, ltd); + else + tex_xblit_norotate(surf, x0, y0, parms, ltd); + } else { + tex_xblit(surf, x0, y0, parms, ltd); + } +} + +void rdpq_tex_blit(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms) +{ + __rdpq_tex_blit(surf, x0, y0, parms, ltd_texloader); +} + +void rdpq_tex_upload_tlut(uint16_t *tlut, int color_idx, int num_colors) +{ + rdpq_set_texture_image_raw(0, PhysicalAddr(tlut), FMT_RGBA16, num_colors, 1); + rdpq_set_tile(RDPQ_TILE_INTERNAL, FMT_I4, TMEM_PALETTE_ADDR + color_idx*2*4, num_colors, NULL); + rdpq_load_tlut_raw(RDPQ_TILE_INTERNAL, 0, num_colors); +} + +void rdpq_tex_multi_begin(void) +{ + // Initialize autotmem engine + rdpq_set_tile_autotmem(0); + if (multi_upload.used++ == 0) { + multi_upload.bytes = 0; + multi_upload.limit = 4096; + last_tload.tex = 0; + } +} + +int rdpq_tex_multi_end(void) +{ + rdpq_set_tile_autotmem(-1); + --multi_upload.used; + assert(multi_upload.used >= 0); + return 0; +} diff --git a/src/rdpq/rdpq_tex_internal.h b/src/rdpq/rdpq_tex_internal.h new file mode 100644 index 0000000000..414d76e9f1 --- /dev/null +++ b/src/rdpq/rdpq_tex_internal.h @@ -0,0 +1,32 @@ +#ifndef LIBDRAGON_RDPQ_TEX_INTERNAL_H +#define LIBDRAGON_RDPQ_TEX_INTERNAL_H + +/** + * @brief Helper function to draw a large surface that doesn't fit in TMEM. + * + * This function analyzes the surface, finds the optimal splitting strategy to + * divided into rectangles that fit TMEM, and then go through them one of by one, + * loading them into TMEM and drawing them. + * + * The actual drawing is done by the caller, through the draw_cb function. This + * function will just call it with the information on the current rectangle + * within the original surface. + * + * @param tile Hint of the tile to use. Note that this function is free to use + * other tiles to perform its job. + * @param tex Surface to draw + * @param s0 Starting X coordinate in the texture to draw + * @param t0 Starting Y coordinate in the texture to draw + * @param s1 Ending X coordinate in the texture to draw + * @param t1 Ending Y coordinate in the texture to draw + * @param draw_cb Callback function to draw rectangle by rectangle. It will be called + * with the tile to use for drawing, and the rectangle of the original + * surface that has been loaded into TMEM. + * @param filtering Enable texture filtering workaround + */ +typedef void (*large_tex_draw)(rdpq_tile_t tile, const surface_t *tex, int s0, int t0, int s1, int t1, + void (*draw_cb)(rdpq_tile_t tile, int s0, int t0, int s1, int t1), bool filtering); + +void __rdpq_tex_blit(const surface_t *surf, float x0, float y0, const rdpq_blitparms_t *parms, large_tex_draw ltd); + +#endif diff --git a/src/rdpq/rdpq_tri.c b/src/rdpq/rdpq_tri.c new file mode 100644 index 0000000000..feed141120 --- /dev/null +++ b/src/rdpq/rdpq_tri.c @@ -0,0 +1,540 @@ +/** + * @file rdpq_tri.c + * @brief RDP Command queue: triangle drawing routine + * @ingroup rdp + * + * This file contains the implementation of a single function: #rdpq_triangle. + * + * The RDP triangle commands are complex to assemble because they are designed + * for the hardware that will be drawing them, rather than for the programmer + * that needs to create them. Specifically, they contain explicit gradients + * (partial derivatives aka horizontal and vertical per-pixel increments) + * for all attributes that need to be interpolated. Moreover, the RDP is able + * to draw triangles with subpixel precision, so input coordinates are fixed + * point and the setup code must take into account exactly how the rasterizer + * will handle fractional values. + */ + +#include +#include +#include "rdpq.h" +#include "rdpq_tri.h" +#include "rspq.h" +#include "rdpq_internal.h" +#include "rdpq_constants.h" +#include "utils.h" +#include "debug.h" + +/** @brief Set to 1 to activate tracing of all parameters of all triangles. */ +#define TRIANGLE_TRACE 0 + +#if TRIANGLE_TRACE +/** @brief like debugf(), but writes only if #TRIANGLE_TRACE is not 0 */ +#define tracef(fmt, ...) debugf(fmt, ##__VA_ARGS__) +#else +/** @brief like debugf(), but writes only if #TRIANGLE_TRACE is not 0 */ +#define tracef(fmt, ...) ({ }) +#endif + +const rdpq_trifmt_t TRIFMT_FILL = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = -1, .tex_offset = -1, .z_offset = -1, +}; + +const rdpq_trifmt_t TRIFMT_SHADE = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = 2, .tex_offset = -1, .z_offset = -1, +}; + +const rdpq_trifmt_t TRIFMT_TEX = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = -1, .tex_offset = 2, .z_offset = -1, +}; + +const rdpq_trifmt_t TRIFMT_SHADE_TEX = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = 2, .tex_offset = 6, .z_offset = -1, +}; + +const rdpq_trifmt_t TRIFMT_ZBUF = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = -1, .tex_offset = -1, .z_offset = 2, +}; + +const rdpq_trifmt_t TRIFMT_ZBUF_SHADE = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = 3, .tex_offset = -1, .z_offset = 2, +}; + +const rdpq_trifmt_t TRIFMT_ZBUF_TEX = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = -1, .tex_offset = 3, .z_offset = 2, +}; + +const rdpq_trifmt_t TRIFMT_ZBUF_SHADE_TEX = (rdpq_trifmt_t){ + .pos_offset = 0, .shade_offset = 3, .tex_offset = 7, .z_offset = 2, +}; + +/** @brief Converts a float to a s16.16 fixed point number */ +static int32_t float_to_s16_16(float f) +{ + // Currently the float must be clamped to this range because + // otherwise the trunc.w.s instruction can potentially trigger + // an unimplemented operation exception due to integer overflow. + // TODO: maybe handle the exception? Clamp the value in the exception handler? + if (f >= 32768.f) { + return 0x7FFFFFFF; + } + + if (f < -32768.f) { + return 0x80000000; + } + + return floor(f * 65536.f); +} + +/** @brief Precomputed information about edges and slopes. */ +typedef struct { + float hx; ///< High edge (X) + float hy; ///< High edge (Y) + float mx; ///< Middle edge (X) + float my; ///< Middle edge (Y) + float fy; ///< Fractional part of Y1 (top vertex) + float ish; ///< Inverse slope of higher edge + float attr_factor; ///< Inverse triangle normal (used to calculate gradients) +} rdpq_tri_edge_data_t; + +__attribute__((always_inline)) +static inline void __rdpq_write_edge_coeffs(rspq_write_t *w, rdpq_tri_edge_data_t *data, uint8_t tile, uint8_t mipmaps, const float *v1, const float *v2, const float *v3) +{ + const float x1 = v1[0]; + const float x2 = v2[0]; + const float x3 = v3[0]; + const float y1 = floorf(v1[1]*4)/4; + const float y2 = floorf(v2[1]*4)/4; + const float y3 = floorf(v3[1]*4)/4; + + const float to_fixed_11_2 = 4.0f; + int32_t y1f = CLAMP((int32_t)floorf(v1[1]*to_fixed_11_2), -4096*4, 4095*4); + int32_t y2f = CLAMP((int32_t)floorf(v2[1]*to_fixed_11_2), -4096*4, 4095*4); + int32_t y3f = CLAMP((int32_t)floorf(v3[1]*to_fixed_11_2), -4096*4, 4095*4); + + data->hx = x3 - x1; + data->hy = y3 - y1; + data->mx = x2 - x1; + data->my = y2 - y1; + float lx = x3 - x2; + float ly = y3 - y2; + + const float nz = (data->hx*data->my) - (data->hy*data->mx); + data->attr_factor = (fabs(nz) > FLT_MIN) ? (-1.0f / nz) : 0; + const uint32_t lft = nz < 0; + + data->ish = (fabs(data->hy) > FLT_MIN) ? (data->hx / data->hy) : 0; + float ism = (fabs(data->my) > FLT_MIN) ? (data->mx / data->my) : 0; + float isl = (fabs(ly) > FLT_MIN) ? (lx / ly) : 0; + data->fy = floorf(y1) - y1; + + const float xh = x1 + data->fy * data->ish; + const float xm = x1 + data->fy * ism; + const float xl = x2; + + rspq_write_arg(w, _carg(lft, 0x1, 23) | _carg(mipmaps ? mipmaps-1 : 0, 0x7, 19) | _carg(tile, 0x7, 16) | _carg(y3f, 0x3FFF, 0)); + rspq_write_arg(w, _carg(y2f, 0x3FFF, 16) | _carg(y1f, 0x3FFF, 0)); + rspq_write_arg(w, float_to_s16_16(xl)); + rspq_write_arg(w, float_to_s16_16(isl)); + rspq_write_arg(w, float_to_s16_16(xh)); + rspq_write_arg(w, float_to_s16_16(data->ish)); + rspq_write_arg(w, float_to_s16_16(xm)); + rspq_write_arg(w, float_to_s16_16(ism)); + + tracef("x1: %f (%08lx)\n", x1, (int32_t)(x1 * 4.0f)); + tracef("x2: %f (%08lx)\n", x2, (int32_t)(x2 * 4.0f)); + tracef("x3: %f (%08lx)\n", x3, (int32_t)(x3 * 4.0f)); + tracef("y1: %f (%08lx)\n", y1, (int32_t)(y1 * 4.0f)); + tracef("y2: %f (%08lx)\n", y2, (int32_t)(y2 * 4.0f)); + tracef("y3: %f (%08lx)\n", y3, (int32_t)(y3 * 4.0f)); + + tracef("hx: %f (%08lx)\n", data->hx, (int32_t)(data->hx * 4.0f)); + tracef("hy: %f (%08lx)\n", data->hy, (int32_t)(data->hy * 4.0f)); + tracef("mx: %f (%08lx)\n", data->mx, (int32_t)(data->mx * 4.0f)); + tracef("my: %f (%08lx)\n", data->my, (int32_t)(data->my * 4.0f)); + tracef("lx: %f (%08lx)\n", lx, (int32_t)(lx * 4.0f)); + tracef("ly: %f (%08lx)\n", ly, (int32_t)(ly * 4.0f)); + + tracef("p1: %f (%08lx)\n", (data->hx*data->my), (int32_t)(data->hx*data->my*16.0f)); + tracef("p2: %f (%08lx)\n", (data->hy*data->mx), (int32_t)(data->hy*data->mx*16.0f)); + tracef("nz: %f (%08lx)\n", nz, (int32_t)(nz * 16.0f)); + tracef("-nz: %f (%08lx)\n", -nz, -(int32_t)(nz * 16.0f)); + tracef("inv_nz: %f (%08lx)\n", data->attr_factor, (int32_t)(data->attr_factor * 65536.0f / 2.0f / 16.0f)); + + tracef("fy: %f (%08lx)\n", data->fy, (int32_t)(data->fy * 65536.0f)); + tracef("ish: %f (%08lx)\n", data->ish, (int32_t)(data->ish * 65536.0f)); + tracef("ism: %f (%08lx)\n", ism, (int32_t)(ism * 65536.0f)); + tracef("isl: %f (%08lx)\n", isl, (int32_t)(isl * 65536.0f)); + + tracef("xh: %f (%08lx)\n", xh, (int32_t)(xh * 65536.0f)); + tracef("xm: %f (%08lx)\n", xm, (int32_t)(xm * 65536.0f)); + tracef("xl: %f (%08lx)\n", xl, (int32_t)(xl * 65536.0f)); +} + +__attribute__((always_inline)) +static inline void __rdpq_write_shade_coeffs(rspq_write_t *w, rdpq_tri_edge_data_t *data, const float *v1, const float *v2, const float *v3) +{ + const float mr = (v2[0] - v1[0]) * 255.f; + const float mg = (v2[1] - v1[1]) * 255.f; + const float mb = (v2[2] - v1[2]) * 255.f; + const float ma = (v2[3] - v1[3]) * 255.f; + const float hr = (v3[0] - v1[0]) * 255.f; + const float hg = (v3[1] - v1[1]) * 255.f; + const float hb = (v3[2] - v1[2]) * 255.f; + const float ha = (v3[3] - v1[3]) * 255.f; + + const float nxR = data->hy*mr - data->my*hr; + const float nxG = data->hy*mg - data->my*hg; + const float nxB = data->hy*mb - data->my*hb; + const float nxA = data->hy*ma - data->my*ha; + const float nyR = data->mx*hr - data->hx*mr; + const float nyG = data->mx*hg - data->hx*mg; + const float nyB = data->mx*hb - data->hx*mb; + const float nyA = data->mx*ha - data->hx*ma; + + const float DrDx = nxR * data->attr_factor; + const float DgDx = nxG * data->attr_factor; + const float DbDx = nxB * data->attr_factor; + const float DaDx = nxA * data->attr_factor; + const float DrDy = nyR * data->attr_factor; + const float DgDy = nyG * data->attr_factor; + const float DbDy = nyB * data->attr_factor; + const float DaDy = nyA * data->attr_factor; + + const float DrDe = DrDy + DrDx * data->ish; + const float DgDe = DgDy + DgDx * data->ish; + const float DbDe = DbDy + DbDx * data->ish; + const float DaDe = DaDy + DaDx * data->ish; + + const int32_t final_r = float_to_s16_16(v1[0] * 255.f + data->fy * DrDe); + const int32_t final_g = float_to_s16_16(v1[1] * 255.f + data->fy * DgDe); + const int32_t final_b = float_to_s16_16(v1[2] * 255.f + data->fy * DbDe); + const int32_t final_a = float_to_s16_16(v1[3] * 255.f + data->fy * DaDe); + + const int32_t DrDx_fixed = float_to_s16_16(DrDx); + const int32_t DgDx_fixed = float_to_s16_16(DgDx); + const int32_t DbDx_fixed = float_to_s16_16(DbDx); + const int32_t DaDx_fixed = float_to_s16_16(DaDx); + + const int32_t DrDe_fixed = float_to_s16_16(DrDe); + const int32_t DgDe_fixed = float_to_s16_16(DgDe); + const int32_t DbDe_fixed = float_to_s16_16(DbDe); + const int32_t DaDe_fixed = float_to_s16_16(DaDe); + + const int32_t DrDy_fixed = float_to_s16_16(DrDy); + const int32_t DgDy_fixed = float_to_s16_16(DgDy); + const int32_t DbDy_fixed = float_to_s16_16(DbDy); + const int32_t DaDy_fixed = float_to_s16_16(DaDy); + + rspq_write_arg(w, (final_r&0xffff0000) | (0xffff&(final_g>>16))); + rspq_write_arg(w, (final_b&0xffff0000) | (0xffff&(final_a>>16))); + rspq_write_arg(w, (DrDx_fixed&0xffff0000) | (0xffff&(DgDx_fixed>>16))); + rspq_write_arg(w, (DbDx_fixed&0xffff0000) | (0xffff&(DaDx_fixed>>16))); + rspq_write_arg(w, (final_r<<16) | (final_g&0xffff)); + rspq_write_arg(w, (final_b<<16) | (final_a&0xffff)); + rspq_write_arg(w, (DrDx_fixed<<16) | (DgDx_fixed&0xffff)); + rspq_write_arg(w, (DbDx_fixed<<16) | (DaDx_fixed&0xffff)); + rspq_write_arg(w, (DrDe_fixed&0xffff0000) | (0xffff&(DgDe_fixed>>16))); + rspq_write_arg(w, (DbDe_fixed&0xffff0000) | (0xffff&(DaDe_fixed>>16))); + rspq_write_arg(w, (DrDy_fixed&0xffff0000) | (0xffff&(DgDy_fixed>>16))); + rspq_write_arg(w, (DbDy_fixed&0xffff0000) | (0xffff&(DaDy_fixed>>16))); + rspq_write_arg(w, (DrDe_fixed<<16) | (DgDe_fixed&0xffff)); + rspq_write_arg(w, (DbDe_fixed<<16) | (DaDe_fixed&0xffff)); + rspq_write_arg(w, (DrDy_fixed<<16) | (DgDy_fixed&0xffff)); + rspq_write_arg(w, (DbDy_fixed<<16) | (DaDy_fixed&0xffff)); + + tracef("b1: %f (%08lx)\n", v1[2], (uint32_t)(v1[2]*255.0f)); + tracef("b2: %f (%08lx)\n", v2[2], (uint32_t)(v2[2]*255.0f)); + tracef("b3: %f (%08lx)\n", v3[2], (uint32_t)(v3[2]*255.0f)); + tracef("mb: %f (%08lx)\n", mb, (uint32_t)(int32_t)mb); + tracef("hb: %f (%08lx)\n", hb, (uint32_t)(int32_t)hb); + tracef("nxB: %f (%08lx)\n", nxB, (int32_t)(nxB * 4.0f)); + tracef("DbDx: %f (%08lx)\n", DbDx, (uint32_t)(DbDx * 65536.0f)); + tracef("DbDx_fixed: (%08lx)\n", DbDx_fixed); +} + +__attribute__((always_inline)) +static inline void __rdpq_write_tex_coeffs(rspq_write_t *w, rdpq_tri_edge_data_t *data, const float *v1, const float *v2, const float *v3) +{ + float s1 = v1[0] * 32.f, t1 = v1[1] * 32.f, invw1 = v1[2]; + float s2 = v2[0] * 32.f, t2 = v2[1] * 32.f, invw2 = v2[2]; + float s3 = v3[0] * 32.f, t3 = v3[1] * 32.f, invw3 = v3[2]; + + const float minw = 1.0f / MAX(MAX(invw1, invw2), invw3); + + tracef("s1: %f (%04x)\n", s1, (int16_t)s1); + tracef("t1: %f (%04x)\n", t1, (int16_t)t1); + tracef("s2: %f (%04x)\n", s2, (int16_t)s2); + tracef("t2: %f (%04x)\n", t2, (int16_t)t2); + + tracef("invw1: %f (%08lx)\n", invw1, (int32_t)(invw1*65536)); + tracef("invw2: %f (%08lx)\n", invw2, (int32_t)(invw2*65536)); + tracef("invw3: %f (%08lx)\n", invw3, (int32_t)(invw3*65536)); + tracef("minw: %f (%08lx)\n", minw, (int32_t)(minw*65536)); + + invw1 *= minw; + invw2 *= minw; + invw3 *= minw; + + s1 *= invw1; + t1 *= invw1; + s2 *= invw2; + t2 *= invw2; + s3 *= invw3; + t3 *= invw3; + + invw1 *= 0x7FFF; + invw2 *= 0x7FFF; + invw3 *= 0x7FFF; + + const float ms = s2 - s1; + const float mt = t2 - t1; + const float mw = invw2 - invw1; + const float hs = s3 - s1; + const float ht = t3 - t1; + const float hw = invw3 - invw1; + + const float nxS = data->hy*ms - data->my*hs; + const float nxT = data->hy*mt - data->my*ht; + const float nxW = data->hy*mw - data->my*hw; + const float nyS = data->mx*hs - data->hx*ms; + const float nyT = data->mx*ht - data->hx*mt; + const float nyW = data->mx*hw - data->hx*mw; + + const float DsDx = nxS * data->attr_factor; + const float DtDx = nxT * data->attr_factor; + const float DwDx = nxW * data->attr_factor; + const float DsDy = nyS * data->attr_factor; + const float DtDy = nyT * data->attr_factor; + const float DwDy = nyW * data->attr_factor; + + const float DsDe = DsDy + DsDx * data->ish; + const float DtDe = DtDy + DtDx * data->ish; + const float DwDe = DwDy + DwDx * data->ish; + + const int32_t final_s = float_to_s16_16(s1 + data->fy * DsDe); + const int32_t final_t = float_to_s16_16(t1 + data->fy * DtDe); + const int32_t final_w = float_to_s16_16(invw1 + data->fy * DwDe); + + const int32_t DsDx_fixed = float_to_s16_16(DsDx); + const int32_t DtDx_fixed = float_to_s16_16(DtDx); + const int32_t DwDx_fixed = float_to_s16_16(DwDx); + + const int32_t DsDe_fixed = float_to_s16_16(DsDe); + const int32_t DtDe_fixed = float_to_s16_16(DtDe); + const int32_t DwDe_fixed = float_to_s16_16(DwDe); + + const int32_t DsDy_fixed = float_to_s16_16(DsDy); + const int32_t DtDy_fixed = float_to_s16_16(DtDy); + const int32_t DwDy_fixed = float_to_s16_16(DwDy); + + rspq_write_arg(w, (final_s&0xffff0000) | (0xffff&(final_t>>16))); + rspq_write_arg(w, (final_w&0xffff0000)); + rspq_write_arg(w, (DsDx_fixed&0xffff0000) | (0xffff&(DtDx_fixed>>16))); + rspq_write_arg(w, (DwDx_fixed&0xffff0000)); + rspq_write_arg(w, (final_s<<16) | (final_t&0xffff)); + rspq_write_arg(w, (final_w<<16)); + rspq_write_arg(w, (DsDx_fixed<<16) | (DtDx_fixed&0xffff)); + rspq_write_arg(w, (DwDx_fixed<<16)); + rspq_write_arg(w, (DsDe_fixed&0xffff0000) | (0xffff&(DtDe_fixed>>16))); + rspq_write_arg(w, (DwDe_fixed&0xffff0000)); + rspq_write_arg(w, (DsDy_fixed&0xffff0000) | (0xffff&(DtDy_fixed>>16))); + rspq_write_arg(w, (DwDy_fixed&0xffff0000)); + rspq_write_arg(w, (DsDe_fixed<<16) | (DtDe_fixed&0xffff)); + rspq_write_arg(w, (DwDe_fixed<<16)); + rspq_write_arg(w, (DsDy_fixed<<16) | (DtDy_fixed&&0xffff)); + rspq_write_arg(w, (DwDy_fixed<<16)); + + tracef("invw1-mul: %f (%08lx)\n", invw1, (int32_t)(invw1*65536)); + tracef("invw2-mul: %f (%08lx)\n", invw2, (int32_t)(invw2*65536)); + tracef("invw3-mul: %f (%08lx)\n", invw3, (int32_t)(invw3*65536)); + + tracef("s1w: %f (%04x)\n", s1, (int16_t)s1); + tracef("t1w: %f (%04x)\n", t1, (int16_t)t1); + tracef("s2w: %f (%04x)\n", s2, (int16_t)s2); + tracef("t2w: %f (%04x)\n", t2, (int16_t)t2); + + tracef("ms: %f (%04x)\n", ms, (int16_t)(ms)); + tracef("mt: %f (%04x)\n", mt, (int16_t)(mt)); + tracef("hs: %f (%04x)\n", hs, (int16_t)(hs)); + tracef("ht: %f (%04x)\n", ht, (int16_t)(ht)); + + tracef("nxS: %f (%04x)\n", nxS, (int16_t)(nxS / 65536.0f)); + tracef("nxT: %f (%04x)\n", nxT, (int16_t)(nxT / 65536.0f)); + tracef("nyS: %f (%04x)\n", nyS, (int16_t)(nyS / 65536.0f)); + tracef("nyT: %f (%04x)\n", nyT, (int16_t)(nyT / 65536.0f)); +} + +__attribute__((always_inline)) +static inline void __rdpq_write_zbuf_coeffs(rspq_write_t *w, rdpq_tri_edge_data_t *data, const float *v1, const float *v2, const float *v3) +{ + const float z1 = v1[0] * 0x7FFF; + const float z2 = v2[0] * 0x7FFF; + const float z3 = v3[0] * 0x7FFF; + + const float mz = z2 - z1; + const float hz = z3 - z1; + + const float nxz = data->hy*mz - data->my*hz; + const float nyz = data->mx*hz - data->hx*mz; + + const float DzDx = nxz * data->attr_factor; + const float DzDy = nyz * data->attr_factor; + const float DzDe = DzDy + DzDx * data->ish; + + const int32_t final_z = float_to_s16_16(z1 + data->fy * DzDe); + const int32_t DzDx_fixed = float_to_s16_16(DzDx); + const int32_t DzDe_fixed = float_to_s16_16(DzDe); + const int32_t DzDy_fixed = float_to_s16_16(DzDy); + + rspq_write_arg(w, final_z); + rspq_write_arg(w, DzDx_fixed); + rspq_write_arg(w, DzDe_fixed); + rspq_write_arg(w, DzDy_fixed); + + tracef("z1: %f (%04x)\n", v1[0], (uint16_t)(z1)); + tracef("z2: %f (%04x)\n", v2[0], (uint16_t)(z2)); + tracef("z3: %f (%04x)\n", v3[0], (uint16_t)(z3)); + + tracef("mz: %f (%04x)\n", mz, (uint16_t)(mz)); + tracef("hz: %f (%04x)\n", hz, (uint16_t)(hz)); + + tracef("nxz: %f (%08lx)\n", nxz, (uint32_t)(nxz * 4.0f)); + tracef("nyz: %f (%08lx)\n", nyz, (uint32_t)(nyz * 4.0f)); + + tracef("dzdx: %f (%08llx)\n", DzDx, (uint64_t)(DzDx * 65536.0f)); + tracef("dzdy: %f (%08llx)\n", DzDy, (uint64_t)(DzDy * 65536.0f)); + tracef("dzde: %f (%08llx)\n", DzDe, (uint64_t)(DzDe * 65536.0f)); +} + +/** @brief RDP triangle primitive assembled on the CPU */ +void rdpq_triangle_cpu(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3) +{ + uint32_t res = AUTOSYNC_PIPE; + if (fmt->tex_offset >= 0) { + // FIXME: this can be using multiple tiles depending on color combiner and texture + // effects such as detail and sharpen. Figure it out a way to handle these in the + // autosync engine. + res |= AUTOSYNC_TILE(fmt->tex_tile); + res |= AUTOSYNC_TMEMS; + } + __rdpq_autosync_use(res); + + uint32_t cmd_id = RDPQ_CMD_TRI; + + uint32_t size = 8; + if (fmt->shade_offset >= 0) { + size += 16; + cmd_id |= 0x4; + } + if (fmt->tex_offset >= 0) { + size += 16; + cmd_id |= 0x2; + } + if (fmt->z_offset >= 0) { + size += 4; + cmd_id |= 0x1; + } + + rspq_write_t w = rspq_write_begin(RDPQ_OVL_ID, cmd_id, size); + + if( v1[fmt->pos_offset + 1] > v2[fmt->pos_offset + 1] ) { SWAP(v1, v2); } + if( v2[fmt->pos_offset + 1] > v3[fmt->pos_offset + 1] ) { SWAP(v2, v3); } + if( v1[fmt->pos_offset + 1] > v2[fmt->pos_offset + 1] ) { SWAP(v1, v2); } + + rdpq_tri_edge_data_t data; + __rdpq_write_edge_coeffs(&w, &data, fmt->tex_tile, fmt->tex_mipmaps, v1 + fmt->pos_offset, v2 + fmt->pos_offset, v3 + fmt->pos_offset); + + if (fmt->shade_offset >= 0) { + const float *shade_v2 = fmt->shade_flat ? v1 : v2; + const float *shade_v3 = fmt->shade_flat ? v1 : v3; + __rdpq_write_shade_coeffs(&w, &data, v1 + fmt->shade_offset, shade_v2 + fmt->shade_offset, shade_v3 + fmt->shade_offset); + } + + if (fmt->tex_offset >= 0) { + __rdpq_write_tex_coeffs(&w, &data, v1 + fmt->tex_offset, v2 + fmt->tex_offset, v3 + fmt->tex_offset); + } + + if (fmt->z_offset >= 0) { + __rdpq_write_zbuf_coeffs(&w, &data, v1 + fmt->z_offset, v2 + fmt->z_offset, v3 + fmt->z_offset); + } + + rspq_write_end(&w); +} + +/** @brief RDP triangle primitive assembled on the RSP */ +void rdpq_triangle_rsp(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3) +{ + uint32_t res = AUTOSYNC_PIPE; + if (fmt->tex_offset >= 0) { + // FIXME: this can be using multiple tiles depending on color combiner and texture + // effects such as detail and sharpen. Figure it out a way to handle these in the + // autosync engine. + res |= AUTOSYNC_TILE(fmt->tex_tile); + res |= AUTOSYNC_TMEM(0); + } + __rdpq_autosync_use(res); + + uint32_t cmd_id = RDPQ_CMD_TRI; + if (fmt->shade_offset >= 0) cmd_id |= 0x4; + if (fmt->tex_offset >= 0) cmd_id |= 0x2; + if (fmt->z_offset >= 0) cmd_id |= 0x1; + + const int TRI_DATA_LEN = ROUND_UP((2+1+1+3)*4, 16); + + const float *vtx[3] = {v1, v2, v3}; + for (int i=0;i<3;i++) { + const float *v = vtx[i]; + + // X,Y: s13.2 + int16_t x = floorf(v[fmt->pos_offset+0] * 4.0f); + int16_t y = floorf(v[fmt->pos_offset+1] * 4.0f); + + int16_t z = 0; + if (fmt->z_offset >= 0) { + z = v[fmt->z_offset+0] * 0x7FFF; + } + + int32_t rgba = 0; + if (fmt->shade_offset >= 0) { + const float *v_shade = fmt->shade_flat ? v1 : v; + uint32_t r = v_shade[fmt->shade_offset+0] * 255.0; + uint32_t g = v_shade[fmt->shade_offset+1] * 255.0; + uint32_t b = v_shade[fmt->shade_offset+2] * 255.0; + uint32_t a = v_shade[fmt->shade_offset+3] * 255.0; + rgba = (r << 24) | (g << 16) | (b << 8) | a; + } + + int16_t s=0, t=0; + int32_t w=0, inv_w=0; + if (fmt->tex_offset >= 0) { + s = v[fmt->tex_offset+0] * 32.0f; + t = v[fmt->tex_offset+1] * 32.0f; + w = float_to_s16_16(1.0f / v[fmt->tex_offset+2]); + inv_w = float_to_s16_16( v[fmt->tex_offset+2]); + } + + rspq_write(RDPQ_OVL_ID, RDPQ_CMD_TRIANGLE_DATA, + TRI_DATA_LEN * i, + (x << 16) | (y & 0xFFFF), + (z << 16), + rgba, + (s << 16) | (t & 0xFFFF), + w, + inv_w); + } + + rspq_write(RDPQ_OVL_ID, RDPQ_CMD_TRIANGLE, + 0xC000 | (cmd_id << 8) | + (fmt->tex_mipmaps ? (fmt->tex_mipmaps-1) << 3 : 0) | + (fmt->tex_tile & 7)); +} + +void rdpq_triangle(const rdpq_trifmt_t *fmt, const float *v1, const float *v2, const float *v3) +{ +#if RDPQ_TRIANGLE_REFERENCE + rdpq_triangle_cpu(fmt, v1, v2, v3); +#else + rdpq_triangle_rsp(fmt, v1, v2, v3); +#endif +} diff --git a/src/rdpq/rsp_rdpq.S b/src/rdpq/rsp_rdpq.S new file mode 100644 index 0000000000..5aef4585c2 --- /dev/null +++ b/src/rdpq/rsp_rdpq.S @@ -0,0 +1,807 @@ +#include +#include "rdpq_constants.h" +#include "rdpq_macros.h" + +#define rdpq_write_ptr s7 + + .data + + RSPQ_BeginOverlayHeader + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xC0 NOOP + RSPQ_DefineCommand RDPQCmd_SetLookupAddress, 8 # 0xC1 Set lookup address + RSPQ_DefineCommand RDPQCmd_RectEx, 8 # 0xC2 Fill Rectangle (esclusive bounds) + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xC3 + RSPQ_DefineCommand RDPQCmd_ResetMode, 16 # 0xC4 Reset Mode (set mode standard) + RSPQ_DefineCommand RDPQCmd_SetCombineMode_2Pass, 8 # 0xC5 SET_COMBINE_MODE (two pass) + RSPQ_DefineCommand RDPQCmd_PushMode, 8 # 0xC6 Push Mode + RSPQ_DefineCommand RDPQCmd_PopMode, 8 # 0xC7 Pop Mode + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 32 # 0xC8 Filled + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 48 # 0xC9 Filled ZBuffered + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 96 # 0xCA Textured + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 112 # 0xCB Textured ZBuffered + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 96 # 0xCC Shaded + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 112 # 0xCD Shaded ZBuffered + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 160 # 0xCE Shaded Textured + RSPQ_DefineCommand RDPQCmd_PassthroughTriangle, 176 # 0xCF Shaded Textured ZBuffered + + RSPQ_DefineCommand RDPQCmd_RectEx, 16 # 0xD0 Texture Rectangle (esclusive bounds) + RSPQ_DefineCommand RDPQCmd_SetDebugMode, 4 # 0xD1 Set Debug mode + RSPQ_DefineCommand RDPQCmd_SetScissorEx, 8 # 0xD2 Set Scissor (exclusive bounds) + RSPQ_DefineCommand RDPQCmd_SetPrimColorComponent, 8 # 0xD3 Set Primimive Color Component (minlod or primlod or rgba) + RSPQ_DefineCommand RDPQCmd_ModifyOtherModes, 12 # 0xD4 Modify SOM + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xD5 + RSPQ_DefineCommand RDPQCmd_SetFillColor32, 8 # 0xD6 + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xD7 + RSPQ_DefineCommand RDPQCmd_SetBlendingMode, 8 # 0xD8 Set Blending Mode + RSPQ_DefineCommand RDPQCmd_SetFogMode, 8 # 0xD9 Set Fog Mode + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xDA + RSPQ_DefineCommand RDPQCmd_SetCombineMode_1Pass, 16 # 0xDB SET_COMBINE_MODE (one pass) + RSPQ_DefineCommand RDPQCmd_AutoTmem_SetAddr, 4 # 0xDC AutoTmem_SetAddr + RSPQ_DefineCommand RDPQCmd_AutoTmem_SetTile, 8 # 0xDD AutoTmem_SetTile + RSPQ_DefineCommand RDPQCmd_Triangle, 4 # 0xDE Triangle (assembled by RSP) + RSPQ_DefineCommand RDPQCmd_TriangleData, 28 # 0xDF Set Triangle Data + + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xE0 + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xE1 + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xE2 + RSPQ_DefineCommand RSPQCmd_Noop, 8 # 0xE3 + RSPQ_DefineCommand RDPQCmd_Passthrough16, 16 # 0xE4 TEXTURE_RECTANGLE + RSPQ_DefineCommand RDPQCmd_Passthrough16, 16 # 0xE5 TEXTURE_RECTANGLE_FLIP + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xE6 SYNC_LOAD + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xE7 SYNC_PIPE + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xE8 SYNC_TILE + RSPQ_DefineCommand RDPQCmd_SyncFull, 8 # 0xE9 SYNC_FULL + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xEA SET_KEY_GB + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xEB SET_KEY_R + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xEC SET_CONVERT + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xED SET_SCISSOR + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xEE SET_PRIM_DEPTH + RSPQ_DefineCommand RDPQCmd_SetOtherModes, 8 # 0xEF SET_OTHER_MODES + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF0 LOAD_TLUT + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF1 RDPQ_DEBUG (debugging command) + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF2 SET_TILE_SIZE + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF3 LOAD_BLOCK + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF4 LOAD_TILE + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF5 SET_TILE + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF6 FILL_RECTANGLE + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF7 SET_FILL_COLOR + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF8 SET_FOG_COLOR + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xF9 SET_BLEND_COLOR + RSPQ_DefineCommand RDPQCmd_SetPrimColor, 8 # 0xFA SET_PRIM_COLOR + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xFB SET_ENV_COLOR + RSPQ_DefineCommand RDPQCmd_Passthrough8, 8 # 0xFC SET_COMBINE_MODE + RSPQ_DefineCommand RDPQCmd_SetFixupImage, 8 # 0xFD SET_TEXTURE_IMAGE + RSPQ_DefineCommand RDPQCmd_SetFixupImage, 8 # 0xFE SET_Z_IMAGE + RSPQ_DefineCommand RDPQCmd_SetColorImage, 8 # 0xFF SET_COLOR_IMAGE + RSPQ_EndOverlayHeader + + .align 4 + .ascii "Dragon RDP Queue" + .ascii "Rasky & Snacchus" + + # RDPQ Overlay state + # NOTE: keep this in sync with rdpq_state_t in rdpq.c + .align 4 + RSPQ_BeginSavedState +RDPQ_SYNCFULL: .quad 0 # Last syncfull command (includes callback). NOTE: this must stay as first variable in the state +RDPQ_SYNCPOINT_ID: .long 0 # Syncpoint ID for the last syncfull command +_PADDING: .long 0 + +RDPQ_RDRAM_STATE_ADDR: .word 0 +RDPQ_RDRAM_SYNCPOINT_ADDR: .word 0 + +RDPQ_ADDRESS_TABLE: .ds.l RDPQ_ADDRESS_TABLE_SIZE + +RDPQ_AUTOTMEM_ADDR: .half 0 +RDPQ_AUTOTMEM_ADDR_PREV:.half 0 +RDPQ_AUTOTMEM_LIMIT: .half 0 +RDPQ_AUTOTMEM_ENABLED: .byte 0 + +# Store individual components of the complex Prim Color structure for sync between commands +# Used in SetPrimColorComponent and SetPrimColor + .align 4 +RDPQ_PRIM_COLOR_EX: .word 0 +RDPQ_PRIM_COLOR_RGBA: .word 0 + + .align 4 +# Stack slots for 3 saved RDP modes +RDPQ_MODE_STACK: .ds.b (RDPQ_MODE_END - RDPQ_MODE)*3 + + .align 4 +RDPQ_TRI_DATA0: .dcb.l 7 + .align 4 +RDPQ_TRI_DATA1: .dcb.l 7 + .align 4 +RDPQ_TRI_DATA2: .dcb.l 7 + + + RSPQ_EndSavedState + + .bss + + .text + + ############################################################# + # RDPQCmd_Passthrough8 + # + # Forwards the RDP command contained in a0 and a1 to the RDP stream. + ############################################################# + .func RDPQCmd_Passthrough8 +RDPQCmd_Passthrough8: + jal_and_j RDPQ_Write8, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_Passthrough16 + # + # Forwards the RDP command contained in a0-a3 to the RDP stream. + ############################################################# + .func RDPQCmd_Passthrough16 +RDPQCmd_Passthrough16: + jal_and_j RDPQ_Write16, RDPQ_Finalize + .endfunc + + + ############################################################# + # RDPQCmd_SetPrimColor + # + # Forwards the RDP command contained in a0 and a1 to the RDP stream. + # and saves the PRIM color data into DMEM for sync + ############################################################# + .func RDPQCmd_SetPrimColor +RDPQCmd_SetPrimColor: + sw a0, %lo(RDPQ_PRIM_COLOR_EX) + j RDPQCmd_Passthrough8 + sw a1, %lo(RDPQ_PRIM_COLOR_RGBA) + .endfunc + + ############################################################# + # RDPQCmd_SetPrimColorComponent + # + # Allows for partial setup of data through the 2 bits in a0 + # Forwards the RDP command contained in a0 and a1 to the RDP stream. + # and saves the PRIM color data into DMEM for sync + ############################################################# + .func RDPQCmd_SetPrimColorComponent +RDPQCmd_SetPrimColorComponent: + lui t0, 0xD300 ^ 0xFA00 + xor a0, t0 + + srl t3, a0, 16 # the selection is stored in the upper half of a0, in the lowest 2 bits + andi t3, 3 + beqz t3, set_rgba + addi t3, -1 + beqz t3, set_primlod +set_minlod: + lw t1, %lo(RDPQ_PRIM_COLOR_EX) # restore current prim color data + and t1, 0x000000FF + or a0, t1 + j setprimcolor_finalize +set_primlod: + lw t1, %lo(RDPQ_PRIM_COLOR_EX) # restore current prim color data + and t1, 0x0000FF00 + or a0, t1 + j setprimcolor_finalize +set_rgba: + lw t1, %lo(RDPQ_PRIM_COLOR_EX) # restore current prim color data + and t1, 0x0000FFFF + or a0, t1 + sw a1, %lo(RDPQ_PRIM_COLOR_RGBA) +setprimcolor_finalize: + sw a0, %lo(RDPQ_PRIM_COLOR_EX) + li s4, %lo(RDPQ_PRIM_COLOR_EX) + j RDPQ_Send + li s3, %lo(RDPQ_PRIM_COLOR_EX)+8 + .endfunc + + + + + ############################################################# + # RDPQCmd_SetOtherModes + # + # Completely overwrites the internal cache of the RDP other modes with the + # values provided in a0 and a1 and submits the command to the RDP. + # It will also re-evaluate any commands that depend on the other modes + # and submit them at the same time. + # + # ARGS: + # a0: Command id and upper word of other modes + # a1: Lower word of other modes + ############################################################# + .func RDPQCmd_SetOtherModes +RDPQCmd_SetOtherModes: + # Save the other modes to internal cache. The MSB of a0 containes + # the 0xEF command ID, but we use that byte for extended SOM flags + # (SOMX_*) in the mode API, so reset it to zero as initial state. + sw a0, %lo(RDPQ_OTHER_MODES) + 0x0 + sb zero, %lo(RDPQ_OTHER_MODES) + 0x0 + j RDPQ_WriteOtherModes + sw a1, %lo(RDPQ_OTHER_MODES) + 0x4 + .endfunc + + ############################################################# + # RDPQCmd_ModifyOtherModes + # + # Modifies a specific part of the other modes and sends the updated + # value to the RDP. + # This function can be used as a standard fixup (in which case, + # it will potential emit a SET_SCISSOR in case the cycle type + # changed between COPY/FILL and 1CYC/2CYC), or as a mode fixup, + # part of the mode API, in which case it must call RDPQ_UpdateRenderMode + # to regenerate the new render mode. + # + # NOTE: The new value in a2 should never have bits set outside of + # the inverse bitmask to ensure correct results. + # + # ARGS: + # a0: Bit 24..31: Command id + # Bit 15: If 1, call RDPQ_UpdateRenderMode. + # Bit 12-0: Word offset into other modes (0 or 4) + # a1: Inverse bit mask of the value to be written + # a2: New value + ############################################################# + .func RDPQCmd_ModifyOtherModes +RDPQCmd_ModifyOtherModes: + lw t1, %lo(RDPQ_OTHER_MODES)(a0) + and t1, a1 + or t1, a2 + sw t1, %lo(RDPQ_OTHER_MODES)(a0) + sll a0, 16 + bltz a0, RDPQ_UpdateRenderMode + + # Prepare the updated command in a0-a1 for the following steps + lw a0, %lo(RDPQ_OTHER_MODES) + 0x0 + lw a1, %lo(RDPQ_OTHER_MODES) + 0x4 + # fallthrough + .endfunc + + ############################################################# + # RDPQ_WriteOtherModes + # + # Appends the other modes command in a0-a1 to the staging area + # and falls through to #RDPQ_FinalizeOtherModes. + ############################################################# + .func RDPQ_WriteOtherModes +RDPQ_WriteOtherModes: + # Write other modes command to staging area, then overwrite + # top byte with 0xEF which is not part of RDPQ_OTHER_MODES but must + # be present when sending to RDP. + jal RDPQ_Write8 + li t1, 0xEF + sb t1, -8(s0) + # fallthrough! + .endfunc + + ############################################################# + # RDPQ_FinalizeOtherModes + # + # Re-evaluates any commands that depend on the other modes, + # appends them to the staging area, and finally calls #RDPQ_Finalize, + # finishing the current command. + ############################################################# + .func RDPQ_FinalizeOtherModes +RDPQ_FinalizeOtherModes: + # Update other commands that need to change some state depending on the other modes + + # SetScissor: + # load the cached command first + lw a0, %lo(RDPQ_SCISSOR_RECT) + 0x0 + lw a1, %lo(RDPQ_SCISSOR_RECT) + 0x4 + # Append the fixed up SetScissor command to staging area and then finalize + jal_and_j RDPQ_WriteSetScissor, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_SetFillColor32 + # + # The RDP command SetFillColor expects a 32-bit value which + # is a "packed color", that is the 32-bit value that must be + # blindly repeated in the framebuffer. Semantically, for 32-bit + # framebuffers, this is the standard RGBA8888 format. For 16-bit + # framebuffers, it must be RGBA5551 repeated two times. + # + # To allow a more flexible approach where the same fill color + # command can be used irrespective of the target framebuffer, + # we create our own SetFillColor32 that only accepts a + # RGBA8888 color but convert it automatically to RGBA5551 + # depending on the target bitdepth (using the last value stored + # by SetColorImage). + ############################################################# + .func RDPQCmd_SetFillColor32 +RDPQCmd_SetFillColor32: + jal_and_j RDPQ_WriteSetFillColor, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_SetScissorEx + # + # The RDP command SetScissor has slightly different behavior + # for rectangles depending on the current cycle mode. In 1cycle/2cycle + # mode the right edge works as an "exclusive" range, meaning + # that pixels with the same x-coordinate as the edge are not + # rendered to. In fill/copy mode on the other hand, it's an inclusive + # range, so one additional column of pixels is included. + # + # To make the API more consistent across all cycle modes, this + # command will adjust the coordinate of the right edge to compensate + # for this behavior depending on the current cycle mode. + ############################################################# + .func RDPQCmd_SetScissorEx +RDPQCmd_SetScissorEx: + lui t1, 0xED00 ^ 0xD200 # SetScissorEx -> SET_SCISSOR + xor a0, t1 + jal_and_j RDPQ_WriteSetScissor, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_SetFixupImage + # + # Acts as passthrough for Set*Image commands with the additional + # address lookup capability. The most significant 6 bits of a1 + # are interpreted as an index into an address table. This command + # will load the value from the table at that index and add it + # to the address in the lower 26 bits. + # To set entries in the table, see #RDPQCmd_SetLookupAddress. + ############################################################# + .func RDPQCmd_SetFixupImage +RDPQCmd_SetFixupImage: + # fixup DRAM address using address slots + jal RDPQ_FixupAddress + lui t3, 0xF000 + or a0, t3 + jal_and_j RDPQ_Write8, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_SetColorImage + # + # Wrapper around RDP Set Color Image, that records + # the target bitdepth. Works with address lookup (see #RDPQCmd_SetFixupImage). + # Will also re-execute any fixup commands that depend on the bitdepth. + ############################################################# + .func RDPQCmd_SetColorImage +RDPQCmd_SetColorImage: + srl t0, a0, 19 + andi t0, 3 + # fixup DRAM address using address slots + jal RDPQ_FixupAddress + sb t0, %lo(RDPQ_TARGET_BITDEPTH) + lui t1, 0xF000 + # Append this command to staging area + jal RDPQ_Write8 + or a0, t1 + + # Append SetFillColor next, then submit everything + lw a1, %lo(RDPQ_FILL_COLOR) + jal_and_j RDPQ_WriteSetFillColor, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_RectEx + # + # Provides a consistent API for the FlllRectangle/TextureRectangle + # command that always uses exclusive ranges across all cycle modes. + ############################################################# + .func RDPQCmd_RectEx +RDPQCmd_RectEx: + lb t0, %lo(RDPQ_OTHER_MODES) + 0x1 + # Bit 21 of the first word is set if FILL or COPY mode is active + andi t0, 0x1 << 5 + # Leave unchanged when not in FILL or COPY mode + beqz t0, rect_substitute + lui t1, 0xD000 ^ 0xE400 # TextureRectEx -> TEXTURE_RECTANGLE (or FillRectEx -> FILL_RECTANGLE) + + # Subtract 1 pixel from XL and YL + addiu a0, -((4 << 12) + 4) + # Multiply DsDx by 4 + lui t0, 0xFFFF + and t2, a3, t0 + sll t2, 2 + andi a3, 0xFFFF + or a3, t2 + +rect_substitute: + # Substitute command ID + xor a0, t1 + # Call Write16 for TexRect and Wirte8 for FillRect, and then RDPQ_Finalize in tailcall + sll t0, a0, 3 + bltz t0, rect_end + li s0, RDPQ_Write8 + li s0, RDPQ_Write16 +rect_end: + jr s0 + li ra, RDPQ_Finalize + .endfunc + + ############################################################# + # RDPQCmd_PassthroughTriangle + # + # Forwards the RDP command in the input stream to the RDP stream. + # The size of the command is automatically detected by reading #rspq_cmd_size + ############################################################# + .func RDPQCmd_PassthroughTriangle +RDPQCmd_PassthroughTriangle: + addi s1, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) # End of command in RSPQ buffer + sub s0, s1, rspq_cmd_size # Start of command in RSPQ buffer + li s4, %lo(RDPQ_CMD_STAGING) # Pointer into RDPQ buffer + # Add num mipmap levels to the triangle + lbu t1, %lo(RDPQ_OTHER_MODES) + 0 + lbu t0, 1(s0) + andi t1, 0x38 # Isolate bits 2-5 (aka 59-61 of SOM) + or t0, t1 + sb t0, 1(s0) +passthrough_copy_loop: + lqv $v00,0, 0x00,s0 + lrv $v00,0, 0x10,s0 + sqv $v00,0, 0x00,s4 + addi s0, 0x10 + blt s0, s1, passthrough_copy_loop + addi s4, 0x10 + + j RDPQ_Finalize + sw s4, %lo(RDPQ_CMD_PTR) + .endfunc + + ############################################################# + # RDPQCmd_SetLookupAddress + # + # Sets an entry in the lookup address table. Note that index + # zero should never changed, so it can act as the "identity". + # + # ARGS: + # a0: Command ID + # a1: Table entry index and address to set the entry to + ############################################################# + .func RDPQCmd_SetLookupAddress +RDPQCmd_SetLookupAddress: + jr ra + sw a1, %lo(RDPQ_ADDRESS_TABLE)(a0) + .endfunc + + ############################################################# + # RDPQ_FixupAddress + # + # Looks up an address from the lookup table and adds it to a1. + # + # ARGS: + # a1: Table entry index and offset to the contained address + # OUTPUTS: + # a1: Will contain the looked up address plus the offset + ############################################################# + .func RDPQ_FixupAddress +RDPQ_FixupAddress: + srl t0, a1, 26 + lw t1, %lo(RDPQ_ADDRESS_TABLE)(t0) + jr ra + add a1, t1 + .endfunc + + ############################################################# + # RDPQCmd_SyncFull + # + # SyncFull command in both static and dynamic mode. The command + # encodes the callback in the lower + ############################################################# + .func RDPQCmd_SyncFull +RDPQCmd_SyncFull: + # Wait until: + # * the previous SYNC_FULL interrupt has been processed by the CPU. + jal SpStatusWait + li t2, SP_STATUS_SIG_RDPSYNCFULL + + # Fetch the current syncpoint ID and put it into the state + lw s0, %lo(RDPQ_RDRAM_SYNCPOINT_ADDR) + li s4, %lo(RDPQ_SYNCPOINT_ID) + jal DMAIn + li t0, DMA_SIZE(8, 1) + + # Set the signal because we're about to schedule a new SYNC_FULL + li t0, SP_WSTATUS_SET_SIG_RDPSYNCFULL + mtc0 t0, COP0_SP_STATUS + + # Store the current SYNC_FULL command in the state. This includes the + # interrupt callback that the CPU will have to run. + sw a0, %lo(RDPQ_SYNCFULL) + 0 + sw a1, %lo(RDPQ_SYNCFULL) + 4 + + # DMA the command plus the syncpoint ID it to RDRAM. + li s4, %lo(RDPQ_SYNCFULL) + lw s0, %lo(RDPQ_RDRAM_STATE_ADDR) + jal DMAOut + li t0, DMA_SIZE(16, 1) + + # Actually send the SYNC_FULL command to the RDP + jal RDPQ_Write8 + nop + li s4, %lo(RDPQ_CMD_STAGING) + lw s3, %lo(RDPQ_CMD_PTR) + sw s4, %lo(RDPQ_CMD_PTR) + jal RDPQ_Send + sb zero, %lo(RDPQ_SYNCFULL_ONGOING) + + # Put futher command writing on hold. By setting RDPQ_SYNCFULL_ONGOING, + # we prevent further calls to RDPQ_Send / RSPQCmd_RdpSetBuffer / RSPQCmd_RdpAppendBuffer + # to actually start a new RDP DMA transfer, until the SYNC_FULL is done. + # This is done to avoid a hardware bug. + li t0, DP_STATUS_BUSY + sb t0, %lo(RDPQ_SYNCFULL_ONGOING) + + j RSPQ_Loop + nop + + .endfunc + + ############################################################# + # RDPQCmd_PushMode + # + # Execute a push on the RDP mode stack. The current RDP mode + # (blender+combiner) is pushed one slot deeper in a stack, + # form which it can be recovered later with RDPQCmd_PopMode + ############################################################# + .func RDPQCmd_PushMode +RDPQCmd_PushMode: + li s0, %lo(RDPQ_MODE) + li s1, %lo(RDPQ_MODE_STACK) + +PushPopMode: + lqv $v00,0, 0x00,s0 + lqv $v01,0, 0x10,s0 + lqv $v02,0, 0x00,s1 + lqv $v03,0, 0x10,s1 + lqv $v04,0, 0x20,s1 + lqv $v05,0, 0x30,s1 + + sqv $v00,0, 0x00,s1 + sqv $v01,0, 0x10,s1 + sqv $v02,0, 0x20,s1 + sqv $v03,0, 0x30,s1 + sqv $v04,0, 0x40,s1 + jr ra + sqv $v05,0, 0x50,s1 + .endfunc + + ############################################################# + # RDPQCmd_PopMode + # + # Execute a pop on the RDP mode stack. The current RDP mode + # (blender+combiner) is popped one slot in a stack, recovering + # the mode that was active when RDPQCmd_PushMode was last called. + ############################################################# + .func RDPQCmd_PopMode +RDPQCmd_PopMode: + li s0, %lo(RDPQ_MODE_STACK) + li s1, %lo(RDPQ_MODE) + # Pop from the stack and then reconfigure the cycle type. + # Notice that technically it wouldn't be necessary to run + # the full RDPQ_UpdateRenderMode (it would be sufficient to call + # RDPQ_Write16+RDPQ_Finalize after loading combiner+other_modes) + # but this way we get to reuse the function without adding more + # specialized code. + lqv $v00,0, 0x00,s0 + lqv $v01,0, 0x10,s0 + lqv $v02,0, 0x20,s0 + lqv $v03,0, 0x30,s0 + lqv $v04,0, 0x40,s0 + lqv $v05,0, 0x50,s0 + + sqv $v00,0, 0x00,s1 + sqv $v01,0, 0x10,s1 + sqv $v02,0, 0x00,s0 + sqv $v03,0, 0x10,s0 + sqv $v04,0, 0x20,s0 + j RDPQ_UpdateRenderMode + sqv $v05,0, 0x30,s0 + .endfunc + + .func RDPQCmd_SetBlendingMode +RDPQCmd_SetBlendingMode: + j RDPQ_SetBlendingMode + nop + .endfunc + + .func RDPQCmd_SetFogMode +RDPQCmd_SetFogMode: + j RDPQ_SetFogMode + nop + .endfunc + + .func RDPQCmd_SetCombineMode_1Pass +RDPQCmd_SetCombineMode_1Pass: + j RDPQ_SetCombineMode_1Pass + nop + .endfunc + + .func RDPQCmd_SetCombineMode_2Pass +RDPQCmd_SetCombineMode_2Pass: + j RDPQ_SetCombineMode_2Pass + nop + .endfunc + + ############################################################# + # RDPQCmd_ResetMode + # + # Reset the current RDP mode to the default setting. This is + # called by rdpq_mode_standard to configure a base setting. + # + # a0,a1: Color combiner to configure + # a2,a3: SOM to configure + ############################################################# +RDPQCmd_ResetMode: + # Keep SOMX_UPDATE_FREEZE if set in the current state. + # This is the only special state that we keep valid over + # a reset mode, which in general always resets SOM. + lw t3, %lo(RDPQ_OTHER_MODES) + 0 + andi t2, t3, SOMX_UPDATE_FREEZE >> 32 + or a2, t2 + + # Clear RDPQ_MODE + li s0, %lo(RDPQ_MODE) + vxor $v00, $v00, $v00 + sqv $v00,0, 0x00,s0 + sqv $v00,0, 0x10,s0 + + # Save SOM right away + sw a2, %lo(RDPQ_OTHER_MODES) + 0 + sw a3, %lo(RDPQ_OTHER_MODES) + 4 + + # Check if the FILL/COPY bit is changed compared to the current mode + # If so, update scissoring + xor t3, a2 + sll t3, 63 - (SOM_CYCLE_SHIFT+1) + bgez t3, reset_end + + move t0, a0 + move t1, a1 + lw a0, %lo(RDPQ_SCISSOR_RECT) + 0x0 + jal RDPQ_WriteSetScissor + lw a1, %lo(RDPQ_SCISSOR_RECT) + 0x4 + move a0, t0 + move a1, t1 + +reset_end: + # Call RDPQCmd_SetCombineMode_1Pass, which will set the standard CC + # and call RDPQ_UpdateRenderMode once + j RDPQCmd_SetCombineMode_1Pass + nop + + + .func RDPQCmd_TriangleData +RDPQCmd_TriangleData: + sw a1, %lo(RDPQ_TRI_DATA0) + 0(a0) # X/Y + sw a2, %lo(RDPQ_TRI_DATA0) + 4(a0) # Z + sw a3, %lo(RDPQ_TRI_DATA0) + 8(a0) # RGBA + + lw t0, CMD_ADDR(16, 28) + lw t1, CMD_ADDR(20, 28) + lw t2, CMD_ADDR(24, 28) + + sw t0, %lo(RDPQ_TRI_DATA0) + 12(a0) # S/T + sw t1, %lo(RDPQ_TRI_DATA0) + 16(a0) # W + jr ra + sw t2, %lo(RDPQ_TRI_DATA0) + 20(a0) # INV_W + .endfunc + + .func RDPQCmd_Triangle +RDPQCmd_Triangle: +#if RDPQ_TRIANGLE_REFERENCE + assert RDPQ_ASSERT_INVALID_CMD_TRI +#else + li s4, %lo(RDPQ_CMD_STAGING) + move s3, s4 + li v0, 2 # disable culling + li a1, %lo(RDPQ_TRI_DATA0) + li a2, %lo(RDPQ_TRI_DATA1) + jal RDPQ_Triangle + li a3, %lo(RDPQ_TRI_DATA2) + jal_and_j RDPQ_Send, RSPQ_Loop + +#endif /* RDPQ_TRIANGLE_REFERENCE */ + .endfunc + + .func RDPQCmd_SetDebugMode +RDPQCmd_SetDebugMode: + jr ra + sb a0, %lo(RDPQ_DEBUG) + .endfunc + + + ######################################### + # RDPQCmd_AutoTmem_SetAddr + # + # Set the current auto-TMEM address. This is used by the auto-TMEM + # feature of rdpq_tex, that will autoallocate TMEM without requiring + # explicit addresses. + # + # ARGS: + # a0: zero: begin auto-TMEM, set address to 0 + # -1: end auto-TMEM + # non-zero: increment auto-TMEM address by a0 multiplied by 8 + ######################################### + + .func RDPQCmd_AutoTmem_SetAddr +RDPQCmd_AutoTmem_SetAddr: + sll a0, 16 + sra a0, 16 + bltz a0, autotmem_end # -1 => end + lhu t0, %lo(RDPQ_AUTOTMEM_ADDR) + beqz a0, autotmem_begin # 0 => start + + # Positive value: increment address + lhu t1, %lo(RDPQ_AUTOTMEM_LIMIT) + #ifdef NDEBUG + lbu t2, %lo(RDPQ_AUTOTMEM_ENABLED) + assert_gt t2, 0, RDPQ_ASSERT_AUTOTMEM_UNPAIRED + #endif + sh t0, %lo(RDPQ_AUTOTMEM_ADDR_PREV) + add t0, a0 + assert_le t0, t1, RDPQ_ASSERT_AUTOTMEM_FULL + jr ra + sh t0, %lo(RDPQ_AUTOTMEM_ADDR) + +autotmem_begin: + # Increment enabled flag. Do nothing if it was already enabled + lbu t0, %lo(RDPQ_AUTOTMEM_ENABLED) + addiu t1, t0, 1 + bnez t0, JrRa + sb t1, %lo(RDPQ_AUTOTMEM_ENABLED) + + # Set address to zero, and limit to 4096 + li t0, 4096/8 + sh t0, %lo(RDPQ_AUTOTMEM_LIMIT) + sh zero, %lo(RDPQ_AUTOTMEM_ADDR_PREV) + jr ra + sh zero, %lo(RDPQ_AUTOTMEM_ADDR) + +autotmem_end: + lbu t0, %lo(RDPQ_AUTOTMEM_ENABLED) + addiu t0, -1 + assert_ge t0, 0, RDPQ_ASSERT_AUTOTMEM_UNPAIRED + jr ra + sb t0, %lo(RDPQ_AUTOTMEM_ENABLED) + + .endfunc + + ####################################################################### + # RDPQCmd_AutoTmem_SetTile + # + # Identical to SetTile, but will automatically use the current + # auto-TMEM address. + # + # Moreover, it will update the auto-TMEM limit depending on texture format. + # In fact, 32-bit, YUV and CI textures will reduce the available + # allocation size to 2048. + ######################################################################## + + .func RDPQCmd_AutoTmem_SetTile +RDPQCmd_AutoTmem_SetTile: + and t1, a0, 1<<18 + beqz t1, 1f + lh t0, %lo(RDPQ_AUTOTMEM_ADDR) + lh t0, %lo(RDPQ_AUTOTMEM_ADDR_PREV) +1: + lui t1, 0xDD00 ^ 0xF500 # AutoTmem_SetTile => SET_TILE + xor a0, t1 + add a0, t0 # Put auto-TMEM address inside the command + + # Check format and see if we need to lower the auto-TMEM limit + # The following formats use the upper half of TMEM in a special way, + # so only the lower half is available for auto-TMEM. + srl t1, a0, 19 + andi t1, 0x1F + li t0, (0<<2)|3 # RGBA32 + beq t1, t0, autotmem_lowerlimit + li t0, (1<<2)|2 # YUV16 + beq t1, t0, autotmem_lowerlimit + li t0, (2<<2)|0 # CI4 + beq t1, t0, autotmem_lowerlimit + li t0, (2<<2)|1 # CI8 + bne t1, t0, RDPQCmd_Passthrough8 +autotmem_lowerlimit: + li t0, 2048/8 + j RDPQCmd_Passthrough8 + sh t0, %lo(RDPQ_AUTOTMEM_LIMIT) + .endfunc + +# Include RDPQ library +#include diff --git a/src/rsp.c b/src/rsp.c index 15f304e3ce..98c423343f 100644 --- a/src/rsp.c +++ b/src/rsp.c @@ -12,11 +12,13 @@ #include #include #include "rsp.h" +#include "rdp.h" #include "debug.h" #include "console.h" #include "regsinternal.h" #include "n64sys.h" #include "interrupt.h" +#include "rdpq/rdpq_debug_internal.h" /** * RSP crash handler ucode (rsp_crash.S) @@ -205,7 +207,7 @@ void __rsp_check_assert(const char *file, int line, const char *func) __attribute__((noreturn, format(printf, 4, 5))) void __rsp_crash(const char *file, int line, const char *func, const char *msg, ...) { - volatile uint32_t *DP_STATUS = (volatile uint32_t*)0xA410000C; + volatile uint32_t *DP_REGS = (volatile uint32_t*)0xA4100000; volatile uint32_t *SP_REGS = (volatile uint32_t*)0xA4040000; rsp_snapshot_t state __attribute__((aligned(8))); @@ -224,25 +226,35 @@ void __rsp_crash(const char *file, int line, const char *func, const char *msg, // Now read all SP registers. Most of them are DMA-related so the earlier // we read them the better. We can't freeze the DMA transfer so they might // be slightly incoherent. - uint32_t sp_regs[8]; - for (int i=0;i<8;i++) + uint32_t sp_regs[8], dp_regs[8]; + for (int i=0;i<8;i++) { sp_regs[i] = i==4 ? sp_status : SP_REGS[i]; + dp_regs[i] = i==3 ? dp_status : DP_REGS[i]; + } MEMORY_BARRIER(); - // Freeze the RDP - *DP_STATUS = 1<<3; - - // Initialize the console - console_init(); - console_set_debug(true); - console_set_render_mode(RENDER_MANUAL); - // Forcibly halt the RSP, and wait also for the DMA engine to be idle *SP_STATUS = SP_WSTATUS_SET_HALT; while (!(*SP_STATUS & SP_STATUS_HALTED)) {} while (*SP_STATUS & (SP_STATUS_DMA_BUSY | SP_STATUS_DMA_FULL)) {} MEMORY_BARRIER(); + // We now need to check whether the RDP has crashed. We need to send a + // DMA transfer (unless one is already going) + uint64_t dummy_rdp_command = 0x2700000000000000ull; // sync pipe + if (!(dp_status & (DP_STATUS_DMA_BUSY | DP_STATUS_START_VALID | DP_STATUS_END_VALID))) { + data_cache_hit_writeback_invalidate(&dummy_rdp_command, sizeof(dummy_rdp_command)); + *DP_START = PhysicalAddr(&dummy_rdp_command); + *DP_END = PhysicalAddr(&dummy_rdp_command + 1); + } + // Check if there are any progresses in DP_CURRENT + for (int i=0; i<20 && *DP_CURRENT == dp_regs[2]; i++) + wait_ms(5); + bool rdp_crashed = *DP_CURRENT == dp_regs[2]; + + // Freeze the RDP + *DP_STATUS = 1<<3; + // Read the current PC. This can only be read after the RSP is halted. uint32_t pc = *SP_PC; MEMORY_BARRIER(); @@ -258,14 +270,27 @@ void __rsp_crash(const char *file, int line, const char *func, const char *msg, rsp_read_data(&state, 764, 0); // Overwrite the status register information with the reads we did at - // the beginning of the handler - for (int i=0;i<8;i++) - state.cop0[i] = sp_regs[i]; - state.cop0[11] = dp_status; + // the beginning of the handler. + // FIXME: maybe not read these anymore from the RSP? + for (int i=0;i<8;i++) { + state.cop0[i+0] = sp_regs[i]; + state.cop0[i+8] = dp_regs[i]; + } // Write the PC now so it doesn't get overwritten by the DMA state.pc = pc; + // Initialize the console + console_init(); + console_set_debug(true); + console_set_render_mode(RENDER_MANUAL); + + // If the validator is active, this is a good moment to flush its buffered + // output. This could also trigger a RDP crash (which might be the + // underlying cause for the RSP crash), so better try that before start + // filling the output buffer. + if (rdpq_trace) rdpq_trace(); + // Dump information on the current ucode name and CPU point of crash const char *uc_name = uc ? uc->name : "???"; char pcpos[120]; @@ -285,6 +310,25 @@ void __rsp_crash(const char *file, int line, const char *func, const char *msg, printf("\n"); } + // Check if the RDP crashed. If the RDP crashed while the validator was active, + // theoretically it should have caught it before (in the rdpq_trace above), + // so we shouldn't event get here. + // Still, there are a few cases where this can happen: + // * the validator could be disabled + // * some unknown RDP crash conditions not yet handled by the validator + // * some race condition for which the validator missed the command that + // triggered the crash + // * validator asserts could be disabled, in which case we dumped the crash + // condition in the debug output, but we still get here. + // NOTE: unfortunately, RDP doesn't always sets the FREEZE bit when it crashes + // (it is unknown why sometimes it doesn't). So this is just a best effort to + // highlight the presence of the important FREEZE bit in DP STATUS that could + // otherwise go unnoticed. + if (rdp_crashed) { + printf("RDP CRASHED: the code triggered a RDP hardware bug.\n"); + printf("Use the rdpq validator (rdpq_debug_start()) to analyze.\n"); + } + // Check if a RSP assert triggered. We check that we reached an // infinite loop with the break instruction in the delay slot. if (*(uint32_t*)(&state.imem[pc+4]) == 0x00BA000D) { @@ -292,7 +336,7 @@ void __rsp_crash(const char *file, int line, const char *func, const char *msg, uint16_t code = state.gpr[1] >> 16; printf("RSP ASSERTION FAILED (0x%x)", code); - if (uc->assert_handler) { + if (uc && uc->assert_handler) { printf(" - "); uc->assert_handler(&state, code); } else { @@ -391,7 +435,7 @@ void __rsp_crash(const char *file, int line, const char *func, const char *msg, // Invoke ucode-specific crash handler, if defined. This will dump ucode-specific // information (possibly decoded from DMEM). - if (uc->crash_handler) { + if (uc && uc->crash_handler) { printf("-----------------------------------------------Ucode data------\n"); uc->crash_handler(&state); } diff --git a/src/rspq/rspq.c b/src/rspq/rspq.c index c9734b7815..6eb3dcd0ca 100644 --- a/src/rspq/rspq.c +++ b/src/rspq/rspq.c @@ -107,10 +107,10 @@ * * Internally, double buffering is used to implement the queue. The size of * each of the buffers is RSPQ_DRAM_LOWPRI_BUFFER_SIZE. When a buffer is full, - * the queue engine writes a RSPQ_CMD_JUMP command with the address of the + * the queue engine writes a #RSPQ_CMD_JUMP command with the address of the * other buffer, to tell the RSP to jump there when it is done. * - * Moreover, just before the jump, the engine also enqueue a RSPQ_CMD_WRITE_STATUS + * Moreover, just before the jump, the engine also enqueue a #RSPQ_CMD_WRITE_STATUS * command that sets the SP_STATUS_SIG_BUFDONE_LOW signal. This is used to * keep track when the RSP has finished processing a buffer, so that we know * it becomes free again for more commands. @@ -119,16 +119,16 @@ * * ## Blocks * - * Blocks are implemented by redirecting rspq_write to a different memory buffer, + * Blocks are implemented by redirecting #rspq_write to a different memory buffer, * allocated for the block. The starting size for this buffer is * RSPQ_BLOCK_MIN_SIZE. If the buffer becomes full, a new buffer is allocated * with double the size (to achieve exponential growth), and it is linked - * to the previous buffer via a RSPQ_CMD_JUMP. So a block can end up being + * to the previous buffer via a #RSPQ_CMD_JUMP. So a block can end up being * defined by multiple memory buffers linked via jumps. * * Calling a block requires some work because of the nesting calls we want * to support. To make the RSP ucode as short as possible, the two internal - * command dedicated to block calls (RSPQ_CMD_CALL and RSPQ_CMD_RET) do not + * command dedicated to block calls (#RSPQ_CMD_CALL and #RSPQ_CMD_RET) do not * manage a call stack by themselves, but only allow to save/restore the * current queue position from a "save slot", whose index must be provided * by the CPU. @@ -151,24 +151,48 @@ * When #rspq_highpri_begin is called, the CPU notifies the RSP that it must * switch to the highpri queues by setting signal SP_STATUS_SIG_HIGHPRI_REQUESTED. * The RSP checks for that signal between each command, and when it sees it, it - * internally calls RSPQ_CMD_SWAP_BUFFERS. This command loads the highpri queue + * internally calls #RSPQ_CMD_SWAP_BUFFERS. This command loads the highpri queue * pointer from a special call slot, saves the current lowpri queue position * in another special save slot, and finally clear SP_STATUS_SIG_HIGHPRI_REQUESTED * and set SP_STATUS_SIG_HIGHPRI_RUNNING instead. * * When the #rspq_highpri_end is called, the opposite is done. The CPU writes - * in the queue a RSPQ_CMD_SWAP_BUFFERS that saves the current highpri pointer + * in the queue a #RSPQ_CMD_SWAP_BUFFERS that saves the current highpri pointer * into its call slot, recover the previous lowpri position, and turns off * SP_STATUS_SIG_HIGHPRI_RUNNING. * * Some careful tricks are necessary to allow multiple highpri queues to be * pending, see #rspq_highpri_begin for details. * + * ## rdpq integrations + * + * There are a few places where the rsqp code is hooked with rdpq to provide + * for coherent usage of the two peripherals. In particular: + * + * * #rspq_wait automatically calls #rdpq_fence. This means that + * it will also wait for RDP to finish executing all commands, which is + * actually expected for its intended usage of "full sync for debugging + * purposes". + * * All rsqp block creation functions call into hooks in rdpq. This is + * necessary because blocks are specially handled by rdpq via static + * buffer, to make sure RDP commands in the block don't passthrough + * via RSP, but are directly DMA from RDRAM into RDP. Moreover, + * See rdpq.c documentation for more details. + * * In specific places, we call into the rdpq debugging module to help + * tracing the RDP commands. For instance, when switching RDP RDRAM + * buffers, RSP will generate an interrupt to inform the debugging + * code that it needs to finish dumping the previous RDP buffer. + * */ #include "rsp.h" #include "rspq.h" +#include "rspq_internal.h" #include "rspq_constants.h" +#include "rdp.h" +#include "rdpq_constants.h" +#include "rdpq/rdpq_internal.h" +#include "rdpq/rdpq_debug_internal.h" #include "interrupt.h" #include "utils.h" #include "n64sys.h" @@ -179,110 +203,11 @@ #include #include -/** - * RSPQ internal commands (overlay 0) - */ -enum { - /** - * @brief RSPQ command: Invalid - * - * Reserved ID for invalid command. This is used as a marker so that RSP knows - * when it has caught up with CPU and reached an empty portion of the buffer. - */ - RSPQ_CMD_INVALID = 0x00, - - /** - * @brief RSPQ command: No-op - * - * This commands does nothing. It can be useful for debugging purposes. - */ - RSPQ_CMD_NOOP = 0x01, - - /** - * @brief RSPQ command: Jump to another buffer - * - * This commands tells the RSP to start fetching commands from a new address. - * It is mainly used internally to implement the queue as a ring buffer (jumping - * at the start when we reach the end of the buffer). - */ - RSPQ_CMD_JUMP = 0x02, - - /** - * @brief RSPQ command: Call a block - * - * This command is used by the block functions to implement the execution of - * a block. It tells RSP to starts fetching commands from the block address, - * saving the current address in an internal save slot in DMEM, from which - * it will be recovered by CMD_RET. Using multiple slots allow for nested - * calls. - */ - RSPQ_CMD_CALL = 0x03, - - /** - * @brief RSPQ command: Return from a block - * - * This command tells the RSP to recover the buffer address from a save slot - * (from which it was currently saved by a CALL command) and begin fetching - * commands from there. It is used to finish the execution of a block. - */ - RSPQ_CMD_RET = 0x04, - - /** - * @brief RSPQ command: DMA transfer - * - * This commands runs a DMA transfer (either DRAM to DMEM, or DMEM to DRAM). - * It is used by #rspq_overlay_register to register a new overlay table into - * DMEM while the RSP is already running (to allow for overlays to be - * registered even after boot), and can be used by the users to perform - * manual DMA transfers to and from DMEM without risking a conflict with the - * RSP itself. - */ - RSPQ_CMD_DMA = 0x05, - - /** - * @brief RSPQ Command: write SP_STATUS register - * - * This command asks the RSP to write to the SP_STATUS register. It is normally - * used to set/clear signals or to raise RSP interrupts. - */ - RSPQ_CMD_WRITE_STATUS = 0x06, - - /** - * @brief RSPQ Command: Swap lowpri/highpri buffers - * - * This command is used as part of the highpri feature. It allows to switch - * between lowpri and highpri queue, by saving the current buffer pointer - * in a special save slot, and restoring the buffer pointer of the other - * queue from another slot. In addition, it also writes to SP_STATUS, to - * be able to adjust signals: entering highpri mode requires clearing - * SIG_HIGHPRI_REQUESTED and setting SIG_HIGHPRI_RUNNING; exiting highpri - * mode requires clearing SIG_HIGHPRI_RUNNING. - * - * The command is called internally by RSP to switch to highpri when the - * SIG_HIGHPRI_REQUESTED is found set; then it is explicitly enqueued by the - * CPU when the highpri queue is finished to switch back to lowpri - * (see #rspq_highpri_end). - */ - RSPQ_CMD_SWAP_BUFFERS = 0x07, - - /** - * @brief RSPQ Command: Test and write SP_STATUS register - * - * This commands does a test-and-write sequence on the SP_STATUS register: first, - * it waits for a certain mask of bits to become zero, looping on it. Then - * it writes a mask to the register. It is used as part of the syncpoint - * feature to raise RSP interrupts, while waiting for the previous - * interrupt to be processed (coalescing interrupts would cause syncpoints - * to be missed). - */ - RSPQ_CMD_TEST_WRITE_STATUS = 0x08 -}; - +/// @cond // Make sure that RSPQ_CMD_WRITE_STATUS and RSPQ_CMD_TEST_WRITE_STATUS have // an even ID number. This is a small trick used to save one opcode in // rsp_queue.S (see cmd_write_status there for an explanation). -/// @cond _Static_assert((RSPQ_CMD_WRITE_STATUS & 1) == 0); _Static_assert((RSPQ_CMD_TEST_WRITE_STATUS & 1) == 0); /// @endcond @@ -308,9 +233,6 @@ _Static_assert((RSPQ_CMD_TEST_WRITE_STATUS & 1) == 0); ptr += 3; \ }) -/** @brief Write an internal command to the RSP queue */ -#define rspq_int_write(cmd_id, ...) rspq_write(0, cmd_id, ##__VA_ARGS__) - static void rspq_crash_handler(rsp_snapshot_t *state); static void rspq_assert_handler(rsp_snapshot_t *state, uint16_t assert_code); @@ -333,61 +255,9 @@ typedef struct rspq_overlay_header_t { uint16_t commands[]; } rspq_overlay_header_t; -/** @brief A pre-built block of commands */ -typedef struct rspq_block_s { - uint32_t nesting_level; ///< Nesting level of the block - uint32_t cmds[]; ///< Block contents (commands) -} rspq_block_t; - /** @brief RSPQ overlays */ rsp_ucode_t *rspq_overlay_ucodes[RSPQ_MAX_OVERLAY_COUNT]; -/** @brief A RSPQ overlay ucode. This is similar to rsp_ucode_t, but is used - * internally to managed it as a RSPQ overlay */ -typedef struct rspq_overlay_t { - uint32_t code; ///< Address of the overlay code in RDRAM - uint32_t data; ///< Address of the overlay data in RDRAM - uint32_t state; ///< Address of the overlay state in RDRAM (within data) - uint16_t code_size; ///< Size of the code in bytes - 1 - uint16_t data_size; ///< Size of the data in bytes - 1 -} rspq_overlay_t; - -/// @cond -_Static_assert(sizeof(rspq_overlay_t) == RSPQ_OVERLAY_DESC_SIZE); -/// @endcond - -/** - * @brief The overlay table in DMEM. - * - * This structure is defined in DMEM by rsp_queue.S, and contains the descriptors - * for the overlays, used by the queue engine to load each overlay when needed. - */ -typedef struct rspq_overlay_tables_s { - /** @brief Table mapping overlay ID to overlay index (used for the descriptors) */ - uint8_t overlay_table[RSPQ_OVERLAY_TABLE_SIZE]; - /** @brief Descriptor for each overlay, indexed by the previous table. */ - rspq_overlay_t overlay_descriptors[RSPQ_MAX_OVERLAY_COUNT]; -} rspq_overlay_tables_t; - -/** - * @brief RSP Queue data in DMEM. - * - * This structure is defined by rsp_queue.S, and represents the - * top portion of DMEM. - */ -typedef struct rsp_queue_s { - rspq_overlay_tables_t tables; ///< Overlay table - /** @brief Pointer stack used by #RSPQ_CMD_CALL and #RSPQ_CMD_RET. */ - uint32_t rspq_pointer_stack[RSPQ_MAX_BLOCK_NESTING_LEVEL]; - uint32_t rspq_dram_lowpri_addr; ///< Address of the lowpri queue (special slot in the pointer stack) - uint32_t rspq_dram_highpri_addr; ///< Address of the highpri queue (special slot in the pointer stack) - uint32_t rspq_dram_addr; ///< Current RDRAM address being processed - int16_t current_ovl; ///< Current overlay index -} __attribute__((aligned(16), packed)) rsp_queue_t; - -/** @brief Address of the RSPQ data header in DMEM (see #rsp_queue_t) */ -#define RSPQ_DATA_ADDRESS 32 - /** * @brief RSP queue building context * @@ -434,6 +304,9 @@ rspq_ctx_t *rspq_ctx; ///< Current context volatile uint32_t *rspq_cur_pointer; ///< Copy of the current write pointer (see #rspq_ctx_t) volatile uint32_t *rspq_cur_sentinel; ///< Copy of the current write sentinel (see #rspq_ctx_t) +/** @brief Buffers that hold outgoing RDP commands (generated via RSP). */ +void *rspq_rdp_dynamic_buffers[2]; + /** @brief RSP queue data in DMEM. */ static rsp_queue_t rspq_data; @@ -441,20 +314,20 @@ static rsp_queue_t rspq_data; static bool rspq_initialized = 0; /** @brief Pointer to the current block being built, or NULL. */ -static rspq_block_t *rspq_block; +rspq_block_t *rspq_block; /** @brief Size of the current block memory buffer (in 32-bit words). */ static int rspq_block_size; /** @brief ID that will be used for the next syncpoint that will be created. */ static int rspq_syncpoints_genid; /** @brief ID of the last syncpoint reached by RSP. */ -static volatile int rspq_syncpoints_done; +volatile int __rspq_syncpoints_done __attribute__((aligned(8))); /** @brief True if the RSP queue engine is running in the RSP. */ static bool rspq_is_running; /** @brief Dummy state used for overlay 0 */ -static uint64_t dummy_overlay_state; +static uint64_t dummy_overlay_state[2]; static void rspq_flush_internal(void); @@ -468,7 +341,13 @@ static void rspq_sp_interrupt(void) // syncpoint done ID and clear the signal. if (status & SP_STATUS_SIG_SYNCPOINT) { wstatus |= SP_WSTATUS_CLEAR_SIG_SYNCPOINT; - ++rspq_syncpoints_done; + ++__rspq_syncpoints_done; + // writeback to memory; this is required for RDPQCmd_SyncFull to fetch the correct value + data_cache_hit_writeback(&__rspq_syncpoints_done, sizeof(__rspq_syncpoints_done)); + } + if (status & SP_STATUS_SIG0) { + wstatus |= SP_WSTATUS_CLEAR_SIG0; + if (rdpq_trace_fetch) rdpq_trace_fetch(true); } MEMORY_BARRIER(); @@ -478,14 +357,22 @@ static void rspq_sp_interrupt(void) } /** @brief Extract the current overlay index and name from the RSP queue state */ -static void rspq_get_current_ovl(rsp_queue_t *rspq, int *ovl_idx, const char **ovl_name) +static void rspq_get_current_ovl(rsp_queue_t *rspq, int *ovl_idx, uint8_t *ovl_id, const char **ovl_name) { + *ovl_id = 0xFF; *ovl_idx = rspq->current_ovl / sizeof(rspq_overlay_t); - if (*ovl_idx == 0) + if (*ovl_idx == 0) { *ovl_name = "builtin"; - else if (*ovl_idx < RSPQ_MAX_OVERLAY_COUNT && rspq_overlay_ucodes[*ovl_idx]) + *ovl_id = 0; + } else if (*ovl_idx < RSPQ_MAX_OVERLAY_COUNT && rspq_overlay_ucodes[*ovl_idx]) { *ovl_name = rspq_overlay_ucodes[*ovl_idx]->name; - else + for (int i=0;itables.overlay_table[i] == *ovl_idx * sizeof(rspq_overlay_t)) { + *ovl_id = i; + break; + } + } + } else *ovl_name = "?"; } @@ -494,16 +381,17 @@ static void rspq_crash_handler(rsp_snapshot_t *state) { rsp_queue_t *rspq = (rsp_queue_t*)(state->dmem + RSPQ_DATA_ADDRESS); uint32_t cur = rspq->rspq_dram_addr + state->gpr[28]; - uint32_t dmem_buffer = RSPQ_DEBUG ? 0x1A0 : 0x100; + uint32_t dmem_buffer = RSPQ_DEBUG ? 0x160 : 0x100; - int ovl_idx; const char *ovl_name; - rspq_get_current_ovl(rspq, &ovl_idx, &ovl_name); + int ovl_idx; const char *ovl_name; uint8_t ovl_id; + rspq_get_current_ovl(rspq, &ovl_idx, &ovl_id, &ovl_name); printf("RSPQ: Normal DRAM address: %08lx\n", rspq->rspq_dram_lowpri_addr); printf("RSPQ: Highpri DRAM address: %08lx\n", rspq->rspq_dram_highpri_addr); printf("RSPQ: Current DRAM address: %08lx + GP=%lx = %08lx\n", rspq->rspq_dram_addr, state->gpr[28], cur); - printf("RSPQ: Current Overlay: %s (%02x)\n", ovl_name, ovl_idx); + printf("RSPQ: RDP DRAM address: %08lx\n", rspq->rspq_rdp_buffers[1]); + printf("RSPQ: Current Overlay: %s (%x)\n", ovl_name, ovl_id); // Dump the command queue in DMEM. In debug mode, there is a marker to check // if we know the correct address. TODO: find a way to expose the symbols @@ -519,25 +407,35 @@ static void rspq_crash_handler(rsp_snapshot_t *state) } // Dump the command queue in RDRAM (both data before and after the current pointer). - debugf("RSPQ: RDRAM Command queue:\n"); - uint32_t *q = (uint32_t*)(0xA0000000 | (cur & 0xFFFFFF)); + debugf("RSPQ: RDRAM Command queue: %s\n", (cur&3) ? "MISALIGNED" : ""); + uint32_t *q = (uint32_t*)(0xA0000000 | (cur & 0xFFFFFC)); for (int j=0;j<4;j++) { for (int i=0;i<16;i++) debugf("%08lx%c", q[i+j*16-32], i+j*16-32==0 ? '*' : ' '); debugf("\n"); } + + debugf("RSPQ: RDP Command queue: %s\n", (cur&7) ? "MISALIGNED" : ""); + q = (uint32_t*)(0xA0000000 | (state->cop0[10] & 0xFFFFF8)); + for (int j=0;j<4;j++) { + for (int i=0;i<16;i+=2) { + debugf("%08lx", q[i+0+j*16-32]); + debugf("%08lx%c", q[i+1+j*16-32], i+j*16-32==0 ? '*' : ' '); + } + debugf("\n"); + } } /** @brief Special RSP assert handler for ASSERT_INVALID_COMMAND */ static void rspq_assert_invalid_command(rsp_snapshot_t *state) { rsp_queue_t *rspq = (rsp_queue_t*)(state->dmem + RSPQ_DATA_ADDRESS); - int ovl_idx; const char *ovl_name; - rspq_get_current_ovl(rspq, &ovl_idx, &ovl_name); + int ovl_idx; const char *ovl_name; uint8_t ovl_id; + rspq_get_current_ovl(rspq, &ovl_idx, &ovl_id, &ovl_name); - uint32_t dmem_buffer = RSPQ_DEBUG ? 0x1A0 : 0x100; + uint32_t dmem_buffer = RSPQ_DEBUG ? 0x160 : 0x100; uint32_t cur = dmem_buffer + state->gpr[28]; - printf("Invalid command\nCommand %02x not found in overlay %s (0x%01x)\n", state->dmem[cur], ovl_name, ovl_idx); + printf("Invalid command\nCommand %02x not found in overlay %s (0x%01x)\n", state->dmem[cur], ovl_name, ovl_id); } /** @brief Special RSP assert handler for ASSERT_INVALID_OVERLAY */ @@ -696,6 +594,10 @@ void rspq_init(void) // Start in low-priority mode rspq_switch_context(&lowpri); + // Allocate the RDP dynamic buffers. + rspq_rdp_dynamic_buffers[0] = malloc_uncached(RDPQ_DYNAMIC_BUFFER_SIZE); + rspq_rdp_dynamic_buffers[1] = malloc_uncached(RDPQ_DYNAMIC_BUFFER_SIZE); + // Verify consistency of state int banner_offset = ROUND_UP(RSPQ_DATA_ADDRESS + sizeof(rsp_queue_t), 16); assertf(!memcmp(rsp_queue.data + banner_offset, "Dragon RSP Queue", 16), @@ -706,13 +608,18 @@ void rspq_init(void) rspq_data.rspq_dram_lowpri_addr = PhysicalAddr(lowpri.cur); rspq_data.rspq_dram_highpri_addr = PhysicalAddr(highpri.cur); rspq_data.rspq_dram_addr = rspq_data.rspq_dram_lowpri_addr; - rspq_data.tables.overlay_descriptors[0].state = PhysicalAddr(&dummy_overlay_state); - rspq_data.tables.overlay_descriptors[0].data_size = sizeof(uint64_t); + rspq_data.rdp_scissor_rect = (0xEDull << 56) | (1 << 12); + rspq_data.rspq_rdp_buffers[0] = PhysicalAddr(rspq_rdp_dynamic_buffers[0]); + rspq_data.rspq_rdp_buffers[1] = PhysicalAddr(rspq_rdp_dynamic_buffers[1]); + rspq_data.rspq_rdp_current = rspq_data.rspq_rdp_buffers[0]; + rspq_data.rspq_rdp_sentinel = rspq_data.rspq_rdp_buffers[0] + RDPQ_DYNAMIC_BUFFER_SIZE; + rspq_data.tables.overlay_descriptors[0].state = PhysicalAddr(dummy_overlay_state); + rspq_data.tables.overlay_descriptors[0].data_size = sizeof(uint64_t)*2; rspq_data.current_ovl = 0; // Init syncpoints rspq_syncpoints_genid = 0; - rspq_syncpoints_done = 0; + __rspq_syncpoints_done = 0; // Init blocks rspq_block = NULL; @@ -724,6 +631,21 @@ void rspq_init(void) rspq_initialized = 1; + // Initialize the RDP + MEMORY_BARRIER(); + *DP_STATUS = DP_WSTATUS_RESET_XBUS_DMEM_DMA | DP_WSTATUS_RESET_FLUSH | DP_WSTATUS_RESET_FREEZE; + MEMORY_BARRIER(); + RSP_WAIT_LOOP(500) { + if (!(*DP_STATUS & (DP_STATUS_START_VALID | DP_STATUS_END_VALID))) { + break; + } + } + MEMORY_BARRIER(); + *DP_START = rspq_data.rspq_rdp_buffers[0]; + MEMORY_BARRIER(); + *DP_END = rspq_data.rspq_rdp_buffers[0]; + MEMORY_BARRIER(); + rspq_start(); } @@ -743,6 +665,9 @@ void rspq_close(void) rspq_initialized = 0; + free_uncached(rspq_rdp_dynamic_buffers[0]); + free_uncached(rspq_rdp_dynamic_buffers[1]); + rspq_close_context(&highpri); rspq_close_context(&lowpri); @@ -750,20 +675,63 @@ void rspq_close(void) unregister_SP_handler(rspq_sp_interrupt); } -void* rspq_overlay_get_state(rsp_ucode_t *overlay_ucode) +static void* overlay_get_state(rsp_ucode_t *overlay_ucode, int *state_size) { uint32_t rspq_data_size = rsp_queue_data_end - rsp_queue_data_start; rspq_overlay_header_t *overlay_header = (rspq_overlay_header_t*)(overlay_ucode->data + rspq_data_size); uint32_t state_offset = (overlay_header->state_start & 0xFFF); - assertf(state_offset >= rspq_data_size + sizeof(rspq_overlay_header_t), "Saved overlay state must start after the overlay header!"); + assertf(state_offset >= rspq_data_size + sizeof(rspq_overlay_header_t), + "Saved overlay state must start after the overlay header (overlay: %s)!", overlay_ucode->name); void* state_ptr = overlay_ucode->data + state_offset; - assertf(state_ptr + overlay_header->state_size + 1 <= overlay_ucode->data_end, "Saved overlay state must be completely within the data segment!"); + assertf(state_ptr + overlay_header->state_size + 1 <= overlay_ucode->data_end, + "Saved overlay state must be completely within the data segment (overlay: %s)", overlay_ucode->name); + + if (state_size) + *state_size = overlay_header->state_size; return state_ptr; } +void* rspq_overlay_get_state(rsp_ucode_t *overlay_ucode) +{ + // Get the RDRAM pointers to the overlay state + int state_size; + uint8_t* state_ptr = overlay_get_state(overlay_ucode, &state_size); + + if (rspq_is_running) + { + // Make sure the RSP is idle, otherwise the overlay state could be modified + // at any time causing race conditions. + rspq_wait(); + + // Check if the current overlay is the one that we are requesting the + // state for. If so, read back the latest updated state from DMEM + // manually via DMA, so that the caller finds the latest contents. + int ovl_idx; const char *ovl_name; uint8_t ovl_id; + rsp_queue_t *rspq = (rsp_queue_t*)((uint8_t*)SP_DMEM + RSPQ_DATA_ADDRESS); + rspq_get_current_ovl(rspq, &ovl_idx, &ovl_id, &ovl_name); + + if (ovl_idx && rspq_overlay_ucodes[ovl_idx] == overlay_ucode) { + rsp_read_data(state_ptr, state_size, state_ptr - overlay_ucode->data); + } + } + + return state_ptr; +} + +rsp_queue_t* __rspq_get_state(void) +{ + // Make sure the RSP is idle, otherwise the state could be modified + // at any time causing race conditions. + rspq_wait(); + + // Read the state and return it + rsp_read_data(&rspq_data, sizeof(rsp_queue_t), RSPQ_DATA_ADDRESS); + return &rspq_data; +} + static uint32_t rspq_overlay_get_command_count(rspq_overlay_header_t *header) { for (uint32_t i = 0; i < RSPQ_MAX_OVERLAY_COMMAND_COUNT + 1; i++) @@ -774,6 +742,7 @@ static uint32_t rspq_overlay_get_command_count(rspq_overlay_header_t *header) } assertf(0, "Overlays can only define up to %d commands!", RSPQ_MAX_OVERLAY_COMMAND_COUNT); + return 0; } static uint32_t rspq_find_new_overlay_index(void) @@ -791,26 +760,17 @@ static uint32_t rspq_find_new_overlay_index(void) static uint32_t rspq_find_new_overlay_id(uint32_t slot_count) { uint32_t cur_free_slots = 0; - bool cur_is_reserved = 0; - - uint32_t found_reserved = 0; for (uint32_t i = 1; i <= RSPQ_OVERLAY_ID_COUNT - slot_count; i++) { // If this slot is occupied, reset number of free slots found if (rspq_data.tables.overlay_table[i] != 0) { cur_free_slots = 0; - cur_is_reserved = 0; continue; } ++cur_free_slots; - // These IDs are reserved for RDP commands - if (i == 0x2 || i == 0x3) { - cur_is_reserved = 1; - } - // If required number of slots have not been found, keep searching if (cur_free_slots < slot_count) { continue; @@ -818,27 +778,11 @@ static uint32_t rspq_find_new_overlay_id(uint32_t slot_count) // We have found consecutive free slots uint32_t found_slot = i - slot_count + 1; - - // If none of those slots are reserved, we are done - if (!cur_is_reserved) { - return found_slot; - } - - // Otherwise, remember the found slot and keep searching. - // If we have already remembered something, don't overwrite it. - // So if only reserved slots are available, we still return the first one of them. - if (found_reserved == 0) { - found_reserved = found_slot; - } - - // Reset and try again - cur_free_slots = 0; - cur_is_reserved = 0; + return found_slot; } - // If no unreserved slots have been found, return the first free reserved one as fallback. - // If all reserved slots are occupied as well, this returns zero, which means the search failed. - return found_reserved; + // If no free slots have been found, return zero, which means the search failed. + return 0; } static void rspq_update_tables(bool is_highpri) @@ -962,9 +906,9 @@ void rspq_overlay_unregister(uint32_t overlay_id) memset(overlay, 0, sizeof(rspq_overlay_t)); // Remove all registered ids - for (uint32_t i = unshifted_id; i < slot_count; i++) + for (uint32_t i = 0; i < slot_count; i++) { - rspq_data.tables.overlay_table[i] = 0; + rspq_data.tables.overlay_table[unshifted_id + i] = 0; } // Reset the command base in the overlay header @@ -1003,9 +947,15 @@ void rspq_next_buffer(void) { // Terminate the previous chunk with a JUMP op to the new chunk. rspq_append1(prev, RSPQ_CMD_JUMP, PhysicalAddr(rspq2)); + return; } + // We are about to switch buffer. If the debugging engine is activate, + // it is a good time to run it, so that it does not accumulate too many + // commands. + if (rdpq_trace) rdpq_trace(); + // Wait until the previous buffer is executed by the RSP. // We cannot write to it if it's still being executed. // FIXME: this should probably transition to a sync-point, @@ -1069,6 +1019,7 @@ void rspq_flush(void) if (rspq_block) return; rspq_flush_internal(); + if (rdpq_trace) rdpq_trace(); } void rspq_highpri_begin(void) @@ -1078,28 +1029,33 @@ void rspq_highpri_begin(void) rspq_switch_context(&highpri); - // If we're continuing on the same buffer another highpri sequence, - // try to skip the highpri epilog and jump to the buffer continuation. - // This is a small performance gain (the RSP doesn't need to exit and re-enter - // the highpri mode) but it also allows to enqueue more than one highpri - // sequence, since we only have a single SIG_HIGHPRI_REQUESTED and there - // would be no way to tell the RSP "there are 3 sequences pending, so exit - // and re-enter three times". - // - // To skip the epilog we write single atomic words over the epilog, - // changing it with a JUMP to the buffer continuation. This operation - // is completely safe because the RSP either see the memory before the - // change (it sees the epilog) or after the change (it sees the new JUMP). - // - // In the first case, it will run the epilog and then reenter the highpri - // mode soon (as we're turning on SIG_HIGHPRI_REQUESTED anyway). In the - // second case, it's going to see the JUMP, skip the epilog and continue. - // The SIG_HIGHPRI_REQUESTED bit will be set but this function, and reset - // at the beginning of the new segment, but it doesn't matter at this point. - if (rspq_cur_pointer[-3]>>24 == RSPQ_CMD_SWAP_BUFFERS) { - volatile uint32_t *epilog = rspq_cur_pointer-4; - rspq_append1(epilog, RSPQ_CMD_JUMP, PhysicalAddr(rspq_cur_pointer)); - rspq_append1(epilog, RSPQ_CMD_JUMP, PhysicalAddr(rspq_cur_pointer)); + // Check if we're not at the beginning of the buffer. This avoids doing + // OOB reads in the next check. + if (rspq_cur_pointer != rspq_ctx->buffers[rspq_ctx->buf_idx]) { + + // If we're continuing on the same buffer another highpri sequence, + // try to skip the highpri epilog and jump to the buffer continuation. + // This is a small performance gain (the RSP doesn't need to exit and re-enter + // the highpri mode) but it also allows to enqueue more than one highpri + // sequence, since we only have a single SIG_HIGHPRI_REQUESTED and there + // would be no way to tell the RSP "there are 3 sequences pending, so exit + // and re-enter three times". + // + // To skip the epilog we write single atomic words over the epilog, + // changing it with a JUMP to the buffer continuation. This operation + // is completely safe because the RSP either see the memory before the + // change (it sees the epilog) or after the change (it sees the new JUMP). + // + // In the first case, it will run the epilog and then reenter the highpri + // mode soon (as we're turning on SIG_HIGHPRI_REQUESTED anyway). In the + // second case, it's going to see the JUMP, skip the epilog and continue. + // The SIG_HIGHPRI_REQUESTED bit will be set but this function, and reset + // at the beginning of the new segment, but it doesn't matter at this point. + if (rspq_cur_pointer[-3]>>24 == RSPQ_CMD_SWAP_BUFFERS) { + volatile uint32_t *epilog = rspq_cur_pointer-4; + rspq_append1(epilog, RSPQ_CMD_JUMP, PhysicalAddr(rspq_cur_pointer)); + rspq_append1(epilog, RSPQ_CMD_JUMP, PhysicalAddr(rspq_cur_pointer)); + } } // Clear SIG_HIGHPRI_REQUESTED and set SIG_HIGHPRI_RUNNING. This is normally done @@ -1134,6 +1090,9 @@ void rspq_highpri_sync(void) { assertf(rspq_ctx != &highpri, "this function can only be called outside of highpri mode"); + // Make sure the RSP is running, otherwise we might be blocking forever. + rspq_flush_internal(); + RSP_WAIT_LOOP(200) { if (!(*SP_STATUS & (SP_STATUS_SIG_HIGHPRI_REQUESTED | SP_STATUS_SIG_HIGHPRI_RUNNING))) break; @@ -1149,11 +1108,14 @@ void rspq_block_begin(void) rspq_block_size = RSPQ_BLOCK_MIN_SIZE; rspq_block = malloc_uncached(sizeof(rspq_block_t) + rspq_block_size*sizeof(uint32_t)); rspq_block->nesting_level = 0; + rspq_block->rdp_block = NULL; // Switch to the block buffer. From now on, all rspq_writes will // go into the block. rspq_switch_context(NULL); rspq_switch_buffer(rspq_block->cmds, rspq_block_size, true); + + __rdpq_block_begin(); } rspq_block_t* rspq_block_end(void) @@ -1167,6 +1129,9 @@ rspq_block_t* rspq_block_end(void) // Switch back to the normal display list rspq_switch_context(&lowpri); + // Save pointer to rdpq block (if any) + rspq_block->rdp_block = __rdpq_block_end(); + // Return the created block rspq_block_t *b = rspq_block; rspq_block = NULL; @@ -1175,6 +1140,9 @@ rspq_block_t* rspq_block_end(void) void rspq_block_free(rspq_block_t *block) { + // Free RDP blocks first + __rdpq_block_free(block->rdp_block); + // Start from the commands in the first chunk of the block int size = RSPQ_BLOCK_MIN_SIZE; void *start = block; @@ -1215,6 +1183,9 @@ void rspq_block_run(rspq_block_t *block) // mode, but it might be an acceptable limitation. assertf(rspq_ctx != &highpri, "block run is not supported in highpri mode"); + // Notify rdpq engine we are about to run a block + __rdpq_block_run(block->rdp_block); + // Write the CALL op. The second argument is the nesting level // which is used as stack slot in the RSP to save the current // pointer position. @@ -1230,6 +1201,16 @@ void rspq_block_run(rspq_block_t *block) } } +void rspq_block_run_rsp(int nesting_level) +{ + __rdpq_block_run(NULL); + if (rspq_block && rspq_block->nesting_level <= nesting_level) { + rspq_block->nesting_level = nesting_level + 1; + assertf(rspq_block->nesting_level < RSPQ_MAX_BLOCK_NESTING_LEVEL, + "reached maximum number of nested block runs"); + } +} + void rspq_noop() { rspq_int_write(RSPQ_CMD_NOOP); @@ -1237,8 +1218,20 @@ void rspq_noop() rspq_syncpoint_t rspq_syncpoint_new(void) { + assertf(rspq_ctx != &highpri, "cannot create syncpoint in highpri mode"); assertf(!rspq_block, "cannot create syncpoint in a block"); assertf(rspq_ctx != &highpri, "cannot create syncpoint in highpri mode"); + + // To create a syncpoint, schedule a CMD_TEST_WRITE_STATUS command that: + // 1. Wait for SP_STATUS_SIG_SYNCPOINT to go zero. This is cleared in + // the RSP interrupt routine and basically make sure that any other + // pending interrupt had been acknowledged. Otherwise, we might + // end up coalescing multiple RSP interrupts, and thus missing + // syncpoints (as we need exactly one handled interrupt per syncpoint). + // 2. Write SP_STATUS with SP_WSTATUS_SET_SIG_SYNCPOINT and SP_WSTATUS_SET_INTR, + // forcing a new RSP interrupt to be generated. The interrupt routine + // (#rspq_sp_interrupt) will notice the SP_STATUS_SIG_SYNCPOINT and know + // that the interrupt has been generated for a syncpoint. rspq_int_write(RSPQ_CMD_TEST_WRITE_STATUS, SP_WSTATUS_SET_INTR | SP_WSTATUS_SET_SIG_SYNCPOINT, SP_STATUS_SIG_SYNCPOINT); @@ -1247,7 +1240,7 @@ rspq_syncpoint_t rspq_syncpoint_new(void) bool rspq_syncpoint_check(rspq_syncpoint_t sync_id) { - int difference = (int)((uint32_t)(sync_id) - (uint32_t)(rspq_syncpoints_done)); + int difference = (int)((uint32_t)(sync_id) - (uint32_t)(__rspq_syncpoints_done)); return difference <= 0; } @@ -1271,6 +1264,27 @@ void rspq_syncpoint_wait(rspq_syncpoint_t sync_id) } } +void rspq_wait(void) +{ + // Check if the RDPQ module was initialized. + if (__rdpq_inited) { + // If so, a full sync requires also waiting for RDP to finish. + rdpq_fence(); + + // Also force a buffer switch to go back to dynamic buffer. This is useful + // in the case the RDP is still pointing to a static buffer (after a block + // is just finished). This allows the user to safely free the static buffer + // after rspq_wait(), as intuition would suggest. + rspq_int_write(RSPQ_CMD_RDP_SET_BUFFER, 0, 0, 0); + } + + // Wait until RSP has finished processing the queue + rspq_syncpoint_wait(rspq_syncpoint_new()); + + // Update the tracing engine (if enabled) + if (rdpq_trace) rdpq_trace(); +} + void rspq_signal(uint32_t signal) { const uint32_t allowed_mask = SP_WSTATUS_CLEAR_SIG0|SP_WSTATUS_SET_SIG0|SP_WSTATUS_CLEAR_SIG1|SP_WSTATUS_SET_SIG1; diff --git a/src/rspq/rspq_internal.h b/src/rspq/rspq_internal.h new file mode 100644 index 0000000000..4ea926692c --- /dev/null +++ b/src/rspq/rspq_internal.h @@ -0,0 +1,258 @@ +/** + * @file rspq_internal.h + * @brief RSP Command queue + * @ingroup rsp + */ + +#ifndef __LIBDRAGON_RSPQ_INTERNAL_H +#define __LIBDRAGON_RSPQ_INTERNAL_H + +#include "rsp.h" +#include "rspq_constants.h" + +/** + * RSPQ internal commands (overlay 0) + */ +enum { + /** + * @brief RSPQ command: Invalid + * + * Reserved ID for invalid command. This is used as a marker so that RSP knows + * when it has caught up with CPU and reached an empty portion of the buffer. + */ + RSPQ_CMD_INVALID = 0x00, + + /** + * @brief RSPQ command: No-op + * + * This commands does nothing. It can be useful for debugging purposes. + */ + RSPQ_CMD_NOOP = 0x01, + + /** + * @brief RSPQ command: Jump to another buffer + * + * This commands tells the RSP to start fetching commands from a new address. + * It is mainly used internally to implement the queue as a ring buffer (jumping + * at the start when we reach the end of the buffer). + */ + RSPQ_CMD_JUMP = 0x02, + + /** + * @brief RSPQ command: Call a block + * + * This command is used by the block functions to implement the execution of + * a block. It tells RSP to starts fetching commands from the block address, + * saving the current address in an internal save slot in DMEM, from which + * it will be recovered by CMD_RET. Using multiple slots allow for nested + * calls. + */ + RSPQ_CMD_CALL = 0x03, + + /** + * @brief RSPQ command: Return from a block + * + * This command tells the RSP to recover the buffer address from a save slot + * (from which it was currently saved by a CALL command) and begin fetching + * commands from there. It is used to finish the execution of a block. + */ + RSPQ_CMD_RET = 0x04, + + /** + * @brief RSPQ command: DMA transfer + * + * This commands runs a DMA transfer (either DRAM to DMEM, or DMEM to DRAM). + * It is used by #rspq_overlay_register to register a new overlay table into + * DMEM while the RSP is already running (to allow for overlays to be + * registered even after boot), and can be used by the users to perform + * manual DMA transfers to and from DMEM without risking a conflict with the + * RSP itself. + */ + RSPQ_CMD_DMA = 0x05, + + /** + * @brief RSPQ Command: write SP_STATUS register + * + * This command asks the RSP to write to the SP_STATUS register. It is normally + * used to set/clear signals or to raise RSP interrupts. + */ + RSPQ_CMD_WRITE_STATUS = 0x06, + + /** + * @brief RSPQ Command: Swap lowpri/highpri buffers + * + * This command is used as part of the highpri feature. It allows to switch + * between lowpri and highpri queue, by saving the current buffer pointer + * in a special save slot, and restoring the buffer pointer of the other + * queue from another slot. In addition, it also writes to SP_STATUS, to + * be able to adjust signals: entering highpri mode requires clearing + * SIG_HIGHPRI_REQUESTED and setting SIG_HIGHPRI_RUNNING; exiting highpri + * mode requires clearing SIG_HIGHPRI_RUNNING. + * + * The command is called internally by RSP to switch to highpri when the + * SIG_HIGHPRI_REQUESTED is found set; then it is explicitly enqueued by the + * CPU when the highpri queue is finished to switch back to lowpri + * (see #rspq_highpri_end). + */ + RSPQ_CMD_SWAP_BUFFERS = 0x07, + + /** + * @brief RSPQ Command: Test and write SP_STATUS register + * + * This commands does a test-and-write sequence on the SP_STATUS register: first, + * it waits for a certain mask of bits to become zero, looping on it. Then + * it writes a mask to the register. It is used as part of the syncpoint + * feature to raise RSP interrupts, while waiting for the previous + * interrupt to be processed (coalescing interrupts would cause syncpoints + * to be missed). + */ + RSPQ_CMD_TEST_WRITE_STATUS = 0x08, + + /** + * @brief RSPQ command: Wait for RDP to be idle. + * + * This command will let the RSP spin-wait until the RDP is idle (that is, + * the DP_STATUS_BUSY bit in COP0_DP_STATUS goes to 0). Notice that the + * RDP is fully asynchronous, and reading DP_STATUS_BUSY basically makes + * sense only after a RDP SYNC_FULL command (#rdpq_sync_full()), when it + * really does make sure that all previous commands have finished + * running. + */ + RSPQ_CMD_RDP_WAIT_IDLE = 0x09, + + /** + * @brief RSPQ Command: send a new buffer to RDP and/or configure it for new commands + * + * This command configures a new buffer in RSP for RDP commands. A buffer is described + * with three pointers: start, cur, sentinel. + * + * Start is the beginning of the buffer. Cur is the current write pointer in the buffer. + * If start==cur, it means the buffer is currently empty; otherwise, it means it contains + * some RDP commands that will be sent to RDP right away. Sentinel is the end of the + * buffer. If cur==sentinel, the buffer is full and no more commands will be written to it. + */ + RSPQ_CMD_RDP_SET_BUFFER = 0x0A, + + /** + * @brief RSPQ Command: send more data to RDP (appended to the end of the current buffer) + * + * This commands basically just sets DP_END to the specified argument, allowing new + * commands appended in the current buffer to be sent to RDP. + */ + RSPQ_CMD_RDP_APPEND_BUFFER = 0x0B, +}; + +/** @brief Write an internal command to the RSP queue */ +#define rspq_int_write(cmd_id, ...) rspq_write(0, cmd_id, ##__VA_ARGS__) + +///@cond +typedef struct rdpq_block_s rdpq_block_t; +///@endcond + +/** + * @brief A rspq block: pre-recorded array of commands + * + * A block (#rspq_block_t) is a prerecorded sequence of RSP commands that can + * be played back. Blocks can be created via #rspq_block_begin / #rspq_block_end, + * and then executed by #rspq_block_run. It is also possible to do nested + * calls (a block can call another block), up to 8 levels deep. + */ +typedef struct rspq_block_s { + uint32_t nesting_level; ///< Nesting level of the block + rdpq_block_t *rdp_block; ///< Option RDP static buffer (with RDP commands) + uint32_t cmds[]; ///< Block contents (commands) +} rspq_block_t; + +/** @brief RDP render mode definition + * + * This is the definition of the current RDP render mode + * + */ +typedef struct __attribute__((packed)) { + uint64_t combiner; + uint64_t combiner_mipmapmask; + uint32_t blend_step0; + uint32_t blend_step1; + uint64_t other_modes; +} rspq_rdp_mode_t; + +// TODO: We could save 4 bytes in the overlay descriptor by assuming that data == code + code_size and that code_size is always a multiple of 8 +/** @brief A RSPQ overlay ucode. This is similar to rsp_ucode_t, but is used + * internally to managed it as a RSPQ overlay */ +typedef struct rspq_overlay_t { + uint32_t code; ///< Address of the overlay code in RDRAM + uint32_t data; ///< Address of the overlay data in RDRAM + uint32_t state; ///< Address of the overlay state in RDRAM (within data) + uint16_t code_size; ///< Size of the code in bytes - 1 + uint16_t data_size; ///< Size of the data in bytes - 1 +} rspq_overlay_t; + +/// @cond +_Static_assert(sizeof(rspq_overlay_t) == RSPQ_OVERLAY_DESC_SIZE); +/// @endcond + +/** + * @brief The overlay table in DMEM. + * + * This structure is defined in DMEM by rsp_queue.S, and contains the descriptors + * for the overlays, used by the queue engine to load each overlay when needed. + */ +typedef struct rspq_overlay_tables_s { + /** @brief Table mapping overlay ID to overlay index (used for the descriptors) */ + uint8_t overlay_table[RSPQ_OVERLAY_TABLE_SIZE]; + /** @brief Descriptor for each overlay, indexed by the previous table. */ + rspq_overlay_t overlay_descriptors[RSPQ_MAX_OVERLAY_COUNT]; +} rspq_overlay_tables_t; + +/** + * @brief RSP Queue data in DMEM. + * + * This structure is defined by rsp_queue.S, and represents the + * top portion of DMEM. + */ +typedef struct rsp_queue_s { + rspq_overlay_tables_t tables; ///< Overlay table + /** @brief Pointer stack used by #RSPQ_CMD_CALL and #RSPQ_CMD_RET. */ + uint32_t rspq_pointer_stack[RSPQ_MAX_BLOCK_NESTING_LEVEL]; + uint32_t rspq_dram_lowpri_addr; ///< Address of the lowpri queue (special slot in the pointer stack) + uint32_t rspq_dram_highpri_addr; ///< Address of the highpri queue (special slot in the pointer stack) + uint32_t rspq_dram_addr; ///< Current RDRAM address being processed + uint32_t rspq_rdp_sentinel; ///< Current RDP RDRAM end pointer (when rdp_current reaches this, the buffer is full) + rspq_rdp_mode_t rdp_mode; ///< RDP current render mode definition + uint64_t rdp_scissor_rect; ///< Current RDP scissor rectangle + uint32_t rspq_rdp_buffers[2]; ///< RDRAM Address of dynamic RDP buffers + uint32_t rspq_rdp_current; ///< Current RDP RDRAM write pointer (normally DP_END) + uint32_t rdp_fill_color; ///< Current RDP fill color + uint8_t rdp_target_bitdepth; ///< Current RDP target buffer bitdepth + uint8_t rdp_syncfull_ongoing; ///< True if a SYNC_FULL is currently ongoing + uint8_t rdpq_debug; ///< Debug mode flag + uint8_t __padding0; + int16_t current_ovl; ///< Current overlay index +} __attribute__((aligned(16), packed)) rsp_queue_t; + +/** @brief Address of the RSPQ data header in DMEM (see #rsp_queue_t) */ +#define RSPQ_DATA_ADDRESS 32 + +/** @brief ID of the last syncpoint reached by RSP. */ +extern volatile int __rspq_syncpoints_done; + +/** @brief True if we are currently building a block. */ +static inline bool rspq_in_block(void) { + extern rspq_block_t *rspq_block; + return rspq_block != NULL; +} + +/** + * @brief Return a pointer to a copy of the current RSPQ state. + * + * @note This function forces a full sync by calling #rspq_wait to + * avoid race conditions. + */ +rsp_queue_t *__rspq_get_state(void); + +/** + * @brief Notify that a RSP command is going to run a block + */ +void rspq_block_run_rsp(int nesting_level); + +#endif diff --git a/src/sprite.c b/src/sprite.c index c48536a64a..9fe8d77578 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -5,6 +5,7 @@ #include "sprite_internal.h" #include "asset.h" #include "utils.h" +#include "rdpq_tex.h" #include #include #include @@ -109,6 +110,45 @@ surface_t sprite_get_lod_pixels(sprite_t *sprite, int num_level) { return surface_make_linear(pixels, fmt, lod->width, lod->height); } +void sprite_get_detail_texparms(sprite_t *sprite, rdpq_texparms_t *parms) { + sprite_ext_t *sx = __sprite_ext(sprite); + if (!sx) + return; + if (parms) { + memset(parms, 0, sizeof(*parms)); + parms->s.translate = sx->detail.texparms.s.translate; + parms->t.translate = sx->detail.texparms.t.translate; + parms->s.scale_log = sx->detail.texparms.s.scale_log; + parms->t.scale_log = sx->detail.texparms.t.scale_log; + parms->s.repeats = sx->detail.texparms.s.repeats; + parms->t.repeats = sx->detail.texparms.t.repeats; + parms->s.mirror = sx->detail.texparms.s.mirror; + parms->t.mirror = sx->detail.texparms.t.mirror; + } +} + +surface_t sprite_get_detail_pixels(sprite_t *sprite, sprite_detail_t *info, rdpq_texparms_t *infoparms) { + // Get access to the extended sprite structure + sprite_ext_t *sx = __sprite_ext(sprite); + if (!sx) + return (surface_t){0}; + + if(!(sx->flags & SPRITE_FLAG_HAS_DETAIL)) + return (surface_t){0}; + + if(info){ + info->use_main_tex = sx->detail.use_main_texture; + info->blend_factor = sx->detail.blend_factor; + } + if(infoparms) + sprite_get_detail_texparms(sprite, infoparms); + + if(sx->detail.use_main_texture) + return sprite_get_lod_pixels(sprite, 0); + // Return the detail texture (LOD7) + return sprite_get_lod_pixels(sprite, 7); +} + uint16_t* sprite_get_palette(sprite_t *sprite) { sprite_ext_t *sx = __sprite_ext(sprite); if(!sx || !sx->pal_file_pos) @@ -135,6 +175,26 @@ surface_t sprite_get_tile(sprite_t *sprite, int h, int v) { tile_width, tile_height); } +bool sprite_get_texparms(sprite_t *sprite, rdpq_texparms_t *parms) { + sprite_ext_t *sx = __sprite_ext(sprite); + if (!sx) + return false; + if (!(sx->flags & SPRITE_FLAG_HAS_TEXPARMS)) + return false; + if (parms) { + memset(parms, 0, sizeof(*parms)); + parms->s.translate = sx->texparms.s.translate; + parms->t.translate = sx->texparms.t.translate; + parms->s.scale_log = sx->texparms.s.scale_log; + parms->t.scale_log = sx->texparms.t.scale_log; + parms->s.repeats = sx->texparms.s.repeats; + parms->t.repeats = sx->texparms.t.repeats; + parms->s.mirror = sx->texparms.s.mirror; + parms->t.mirror = sx->texparms.t.mirror; + } + return true; +} + int sprite_get_lod_count(sprite_t *sprite) { sprite_ext_t *sx = __sprite_ext(sprite); if (!sx) From 6d096f38c1fe65ec8ec21e5611a64cb047abf68c Mon Sep 17 00:00:00 2001 From: Dennis Heinze Date: Thu, 25 Jan 2024 23:04:15 +0100 Subject: [PATCH 05/48] rdpq: Add unit tests Co-authored-by: Giovanni Bajo --- tests/Makefile | 14 + tests/assets/grass1.ci8.png | Bin 0 -> 1704 bytes tests/assets/grass1.rgba32.png | Bin 0 -> 1704 bytes tests/assets/grass1sq.rgba32.png | Bin 0 -> 1381 bytes tests/assets/grass2.rgba32.png | Bin 0 -> 1704 bytes tests/rsp_test.S | 34 +- tests/test_rdpq.c | 2089 ++++++++++++++++++++++++++++++ tests/test_rdpq_attach.c | 58 + tests/test_rdpq_sprite.c | 79 ++ tests/test_rdpq_tex.c | 406 ++++++ tests/test_rdpq_tri.c | 194 +++ tests/test_rspq.c | 83 +- tests/testrom.c | 47 + 13 files changed, 3001 insertions(+), 3 deletions(-) create mode 100644 tests/assets/grass1.ci8.png create mode 100644 tests/assets/grass1.rgba32.png create mode 100644 tests/assets/grass1sq.rgba32.png create mode 100644 tests/assets/grass2.rgba32.png create mode 100644 tests/test_rdpq.c create mode 100644 tests/test_rdpq_attach.c create mode 100644 tests/test_rdpq_sprite.c create mode 100644 tests/test_rdpq_tex.c create mode 100644 tests/test_rdpq_tri.c diff --git a/tests/Makefile b/tests/Makefile index 742dcfa7ab..ab764addfc 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -3,13 +3,27 @@ include $(N64_INST)/include/n64.mk all: testrom.z64 testrom_emu.z64 + $(BUILD_DIR)/testrom.dfs: $(wildcard filesystem/*) +ASSETS = filesystem/grass1.ci8.sprite \ + filesystem/grass1.rgba32.sprite \ + filesystem/grass1sq.rgba32.sprite \ + filesystem/grass2.rgba32.sprite + OBJS = $(BUILD_DIR)/test_constructors_cpp.o \ $(BUILD_DIR)/rsp_test.o \ $(BUILD_DIR)/rsp_test2.o \ $(BUILD_DIR)/backtrace.o \ +filesystem/grass1sq.rgba32.sprite: MKSPRITE_FLAGS=--texparms 0,0,2,0 +filesystem/grass2.rgba32.sprite: MKSPRITE_FLAGS=--mipmap BOX + +filesystem/%.sprite: assets/%.png + @mkdir -p $(dir $@) + @echo " [SPRITE] $@" + @$(N64_MKSPRITE) $(MKSPRITE_FLAGS) -o filesystem "$<" + $(BUILD_DIR)/testrom.elf: $(BUILD_DIR)/testrom.o $(OBJS) testrom.z64: N64_ROM_TITLE="Libdragon Test ROM" testrom.z64: $(BUILD_DIR)/testrom.dfs diff --git a/tests/assets/grass1.ci8.png b/tests/assets/grass1.ci8.png new file mode 100644 index 0000000000000000000000000000000000000000..24486775c00e98d1ff0504a0dbba041648e70139 GIT binary patch literal 1704 zcmZ{kdo^T8PZy#2V_9VDnQ(W<9O4&9;SXPQbgCcUX1bS19?i z!vg&fNo?M6hCZ7S@jFqVTLQ`2n&*PbG-(Ah!Z#-|PYiWhUr0(aW)G8MT$*YJO+tLN zxhd)9XmAx61yvmBsnfHlnY3KIyS2g7;U)gZ0|why)Ce2Kdmns!$e7tSqAZ>P7o)T` zTcZ>uWS&>+#m=C;p)s^3S?8eDx9#OIY$FfP2-$bG=8F^X)yP|{$HNWEvGUvX8Ex)y z`EfSSw&?R0CfWXllThqcv^GxPUYnIx;A3zjLLl&_xHwh9$Glw3Ae_9&x_8xJUIShE zz|yPvU~b(wHXtKVeCq5i%ni8v#E-v=_You?;}Tyi;~zEnvj+ET8eF3sTwb3S>;C|= zOz%U^X2XnFd63ae#+&x?p<0RSntaq=aq`RQ_vf+gOm95P*R7HrW{^|G@_~I>7uHV>BYHfhmAh+taL{(Q5%f3OrRd-6Y-&XeKc0`?@_LF=jv8tNtkel|=SZb-ToXT^Y@m-G$ z&X}5Zng>6s`0y#I(*X4obRY=&SYKixc`1idru3$3*#1?p@VCpy#9`Pcd4$FR;0arh4f^Ia~h6;XP{L{Q>$O!er8HL2pV zmM*)j1L_A8ZoPF3v|ZZ#LRLSLBGvwe74s!67wR1ec~S_?z3GhU(JH6a6F(43@c(OQ zgDuE-pOw0{mNRZbr0&f4P_@yY$yl$(tc2NgV}4BO5x)a3)eOzOi+(=D1~DxILc1dm z`z~*!dKzUS^P39=q-m{ZEKtI#9)-{r9TUiIGNX!OODC+69)7ZRhuemP+ha;eAS4-gw_k$2Rb0tF-In4>g z)ecfHx%{;;q;tCuuZ*2>K$ zQLmQ=s`10AkxiJDI&sf1$NJO-Ma14%M2MZ@OfyRAxt%mP6JWioTYMnxT&x-^bQDh6 zt&gvS$8sEWwXws=qT80FqP8TPp|8uFgxoj;l7%i+MCxkaLF9Wp$agYaYKPM97e(R1 z$B`f>P)9^Z^wC|lo2N72>7FaWY;&31eJLhS)9wq|^|X>QTkFYUcKtDni&bu@C4$l6 z%b0@o({l^*EXMavcZ!k5@TJz{QvU2BC>-p|f0@vS>>~*Ir2M-V{rv%3)muxYo$3$XS<^Ojd>B2NOsAluk|`u_^%eR|1_DPf4MZ dg54x)ufQS#kReO_|{|DFA4O;*J literal 0 HcmV?d00001 diff --git a/tests/assets/grass1.rgba32.png b/tests/assets/grass1.rgba32.png new file mode 100644 index 0000000000000000000000000000000000000000..24486775c00e98d1ff0504a0dbba041648e70139 GIT binary patch literal 1704 zcmZ{kdo^T8PZy#2V_9VDnQ(W<9O4&9;SXPQbgCcUX1bS19?i z!vg&fNo?M6hCZ7S@jFqVTLQ`2n&*PbG-(Ah!Z#-|PYiWhUr0(aW)G8MT$*YJO+tLN zxhd)9XmAx61yvmBsnfHlnY3KIyS2g7;U)gZ0|why)Ce2Kdmns!$e7tSqAZ>P7o)T` zTcZ>uWS&>+#m=C;p)s^3S?8eDx9#OIY$FfP2-$bG=8F^X)yP|{$HNWEvGUvX8Ex)y z`EfSSw&?R0CfWXllThqcv^GxPUYnIx;A3zjLLl&_xHwh9$Glw3Ae_9&x_8xJUIShE zz|yPvU~b(wHXtKVeCq5i%ni8v#E-v=_You?;}Tyi;~zEnvj+ET8eF3sTwb3S>;C|= zOz%U^X2XnFd63ae#+&x?p<0RSntaq=aq`RQ_vf+gOm95P*R7HrW{^|G@_~I>7uHV>BYHfhmAh+taL{(Q5%f3OrRd-6Y-&XeKc0`?@_LF=jv8tNtkel|=SZb-ToXT^Y@m-G$ z&X}5Zng>6s`0y#I(*X4obRY=&SYKixc`1idru3$3*#1?p@VCpy#9`Pcd4$FR;0arh4f^Ia~h6;XP{L{Q>$O!er8HL2pV zmM*)j1L_A8ZoPF3v|ZZ#LRLSLBGvwe74s!67wR1ec~S_?z3GhU(JH6a6F(43@c(OQ zgDuE-pOw0{mNRZbr0&f4P_@yY$yl$(tc2NgV}4BO5x)a3)eOzOi+(=D1~DxILc1dm z`z~*!dKzUS^P39=q-m{ZEKtI#9)-{r9TUiIGNX!OODC+69)7ZRhuemP+ha;eAS4-gw_k$2Rb0tF-In4>g z)ecfHx%{;;q;tCuuZ*2>K$ zQLmQ=s`10AkxiJDI&sf1$NJO-Ma14%M2MZ@OfyRAxt%mP6JWioTYMnxT&x-^bQDh6 zt&gvS$8sEWwXws=qT80FqP8TPp|8uFgxoj;l7%i+MCxkaLF9Wp$agYaYKPM97e(R1 z$B`f>P)9^Z^wC|lo2N72>7FaWY;&31eJLhS)9wq|^|X>QTkFYUcKtDni&bu@C4$l6 z%b0@o({l^*EXMavcZ!k5@TJz{QvU2BC>-p|f0@vS>>~*Ir2M-V{rv%3)muxYo$3$XS<^Ojd>B2NOsAluk|`u_^%eR|1_DPf4MZ dg54x)ufQS#kReO_|{|DFA4O;*J literal 0 HcmV?d00001 diff --git a/tests/assets/grass1sq.rgba32.png b/tests/assets/grass1sq.rgba32.png new file mode 100644 index 0000000000000000000000000000000000000000..d17b3f6e780e64294b3ed370512fae37586bf051 GIT binary patch literal 1381 zcmY+D2apqG6vw{ATtNCcEinlU$D6aU5`W zAVt9fcsA^)DE3~#h865$MMbb+K@^=4mtjW6H}il0@4b1yc{AU9-x{l>b=s8qQvd+d z+Drz!;4@EY(s&_@WHTIsEexCNW&rxC0T^5fz=@NB9|oWR0dRx_0N(<@-1H`5PYVF! zj#)dM#-0q?+`WYA*V*Ebub?7%bt0o<`s%VP8bUdZi-Ro@X-7y#WRW0`rq^~5dEA@A z+)2n9lC}{`xj{{$T8 zzL-3aK*BjCRa6Djnp9a&=THx);D#IgMNNEApQ$vaifSqYTSKGrL}ez&97|s zaF8zzhl+48jU>w|nnk%X*5wyD*cw+_MkmzK3>wK{OcpJUn#yZC`E{+1nApw2WK0{Y z)D~A5{ZXlvsAfxAy3*iFXsA-1pT%M&-O$FC(uir*nA4L|cqmo2zqx<8A={_pSJ#(E zT2n(!7E11jz)bgtk_AJns4uQ-XL=g~8QdOK z(iuG4R~O)vOsU=xR9FeIEi9wiT7GqNW?5Z=uMeg)@q)(9NGzn(8-oLkqKlB^28_u{ zlf|Q;3OLoPNe{Qu8EuN!M+-_13zp^n#^wpliF}*cIPuVg$#W;b5~${cz$|DpG&<+)ubNqP^dF%d zH~y@%Crq3)dCJstrcIv#X3jcy_Ic;enLF=-`3o+*=;BMj!b=w|UQ&J8Qc;aqB9+M% z0D=)EimB8ZEncgu)7OKB#-?V2v8C12*51)+wsZlj&F*k^yL#Lnug@O{5`YX*VVa3V zSuPe&BvWaS$>#EWp;+oI_bsdR4=e|RD~49`t5y$>jILRG`MUKRz{V@C+_d?stFO6s z%hqkzja`2Oxbdc&x8HK>ZMWZX=UqGQzGuU|VCQ}J?|R_DhaP_9(Z?Qt;>o9;2G2bE z-19HIxO>k_FTe8YYkOaR1HAdx+wbh#|L%M5fAHZ)A9sCn0DSt{=U+5@`PIR%ztMkt z=)1#5!1q7=_|ws!fBE&dW4|B&@3H6Vl)ihGU2Xf^Z1hh0fpqKOmW=xk&j8fI49eM=>11 xP6K^T8PZy#2V_9VDnQ(W<9O4&9;SXPQbgCcUX1bS19?i z!vg&fNo?M6hCZ7S@jFqVTLQ`2n&*PbG-(Ah!Z#-|PYiWhUr0(aW)G8MT$*YJO+tLN zxhd)9XmAx61yvmBsnfHlnY3KIyS2g7;U)gZ0|why)Ce2Kdmns!$e7tSqAZ>P7o)T` zTcZ>uWS&>+#m=C;p)s^3S?8eDx9#OIY$FfP2-$bG=8F^X)yP|{$HNWEvGUvX8Ex)y z`EfSSw&?R0CfWXllThqcv^GxPUYnIx;A3zjLLl&_xHwh9$Glw3Ae_9&x_8xJUIShE zz|yPvU~b(wHXtKVeCq5i%ni8v#E-v=_You?;}Tyi;~zEnvj+ET8eF3sTwb3S>;C|= zOz%U^X2XnFd63ae#+&x?p<0RSntaq=aq`RQ_vf+gOm95P*R7HrW{^|G@_~I>7uHV>BYHfhmAh+taL{(Q5%f3OrRd-6Y-&XeKc0`?@_LF=jv8tNtkel|=SZb-ToXT^Y@m-G$ z&X}5Zng>6s`0y#I(*X4obRY=&SYKixc`1idru3$3*#1?p@VCpy#9`Pcd4$FR;0arh4f^Ia~h6;XP{L{Q>$O!er8HL2pV zmM*)j1L_A8ZoPF3v|ZZ#LRLSLBGvwe74s!67wR1ec~S_?z3GhU(JH6a6F(43@c(OQ zgDuE-pOw0{mNRZbr0&f4P_@yY$yl$(tc2NgV}4BO5x)a3)eOzOi+(=D1~DxILc1dm z`z~*!dKzUS^P39=q-m{ZEKtI#9)-{r9TUiIGNX!OODC+69)7ZRhuemP+ha;eAS4-gw_k$2Rb0tF-In4>g z)ecfHx%{;;q;tCuuZ*2>K$ zQLmQ=s`10AkxiJDI&sf1$NJO-Ma14%M2MZ@OfyRAxt%mP6JWioTYMnxT&x-^bQDh6 zt&gvS$8sEWwXws=qT80FqP8TPp|8uFgxoj;l7%i+MCxkaLF9Wp$agYaYKPM97e(R1 z$B`f>P)9^Z^wC|lo2N72>7FaWY;&31eJLhS)9wq|^|X>QTkFYUcKtDni&bu@C4$l6 z%b0@o({l^*EXMavcZ!k5@TJz{QvU2BC>-p|f0@vS>>~*Ir2M-V{rv%3)muxYo$3$XS<^Ojd>B2NOsAluk|`u_^%eR|1_DPf4MZ dg54x)ufQS#kReO_|{|DFA4O;*J literal 0 HcmV?d00001 diff --git a/tests/rsp_test.S b/tests/rsp_test.S index 04b9f9d9b3..3b1c40b5cb 100644 --- a/tests/rsp_test.S +++ b/tests/rsp_test.S @@ -1,6 +1,7 @@ #include #define ASSERT_GP_BACKWARD 0xF001 // Also defined in test_rspq.c +#define ASSERT_TOO_MANY_NOPS 0xF002 .set noreorder .set at @@ -18,6 +19,8 @@ RSPQ_DefineCommand command_reset_log, 4 # 0x07 RSPQ_DefineCommand command_big, 132 # 0x08 RSPQ_DefineCommand command_big_out, 8 # 0x09 + RSPQ_DefineCommand command_send_rdp, 8 # 0x0A + RSPQ_DefineCommand command_send_rdp_many, 4 # 0x0B RSPQ_EndOverlayHeader RSPQ_BeginSavedState @@ -28,10 +31,13 @@ TEST_PADDING2: .long 0 TEST_VARIABLE2: .long 0 RSPQ_EndSavedState +TEST_RDP_STAGING: .quad 0 + BIG_LOG_PTR: .long 0 +#define BIG_LOG_SIZE 2048 .align 10 -BIG_LOG: .ds.b 2048 +BIG_LOG: .ds.b BIG_LOG_SIZE .align 2 TEST_BIG: .ds.b 128 @@ -46,6 +52,7 @@ command_test: sw t0, %lo(TEST_VARIABLE) command_test_high: + # Compare the last entry in the big log with the current command (RDRAM+GP). # If RDRAM pointer is the same, but GP is less than before, it means that # GP has moved backward in the same buffer, and this is surely an error. @@ -110,6 +117,29 @@ command_reset_log: jr ra sw zero, %lo(BIG_LOG_PTR) +command_send_rdp: + li s4, %lo(TEST_RDP_STAGING) + li s3, %lo(TEST_RDP_STAGING) + 8 + sw zero, 0(s4) + j RDPQ_Send + sw a1, 4(s4) + +command_send_rdp_many: + and a0, 0x00FFFFFF + assert_lt a0, BIG_LOG_SIZE/8, ASSERT_TOO_MANY_NOPS + lui t1, 0xC000 + li s3, %lo(BIG_LOG) +1: + sw t1, 0(s3) + sw zero, 4(s3) + addi s3, 8 + addiu a0, -1 + bnez a0, 1b + nop + j RDPQ_Send + li s4, %lo(BIG_LOG) + + command_big: addi s1, rspq_dmem_buf_ptr, -128 move s2, zero @@ -129,3 +159,5 @@ command_big_out: li s4, %lo(TEST_BIG) j DMAOut li t0, DMA_SIZE(128, 1) + +#include diff --git a/tests/test_rdpq.c b/tests/test_rdpq.c new file mode 100644 index 0000000000..c0b2f53f25 --- /dev/null +++ b/tests/test_rdpq.c @@ -0,0 +1,2089 @@ +#include +#include +#include "../src/rspq/rspq_internal.h" +#include "../src/rdpq/rdpq_internal.h" +#include + +#define BITS(v, b, e) ((unsigned int)((v) << (63-(e)) >> (63-(e)+(b)))) + +static uint64_t rdp_stream[4096]; +static struct { + int idx; + int num_cmds; + int last_som; + int last_cc; +} rdp_stream_ctx; + +static void debug_rdp_stream(void *ctx, uint64_t *cmd, int sz) { + if (rdp_stream_ctx.idx+sz >= 4096) return; + + switch (BITS(cmd[0],56,61)) { + case 0x2F: + rdp_stream_ctx.last_som = rdp_stream_ctx.idx; + break; + case 0x3C: + rdp_stream_ctx.last_cc = rdp_stream_ctx.idx; + break; + } + memcpy(rdp_stream + rdp_stream_ctx.idx, cmd, sz*8); + rdp_stream_ctx.idx += sz; + rdp_stream_ctx.num_cmds++; +} + +static void debug_rdp_stream_reset(void) { + memset(&rdp_stream_ctx, 0, sizeof(rdp_stream_ctx)); + rdp_stream_ctx.last_som = -1; + rdp_stream_ctx.last_cc = -1; +} + +static void debug_rdp_stream_init(void) { + rspq_wait(); // avoid race conditions with pending commands + debug_rdp_stream_reset(); + rdpq_debug_install_hook(debug_rdp_stream, NULL); +} + +uint64_t debug_rdp_stream_last_som(void) { + if (rdp_stream_ctx.last_som < 0) return 0; + return rdp_stream[rdp_stream_ctx.last_som]; +} + +uint64_t debug_rdp_stream_last_cc(void) { + if (rdp_stream_ctx.last_cc < 0) return 0; + return rdp_stream[rdp_stream_ctx.last_cc]; +} + +uint32_t debug_rdp_stream_count_cmd(uint32_t cmd_id) { + uint32_t count = 0; + for (int i=0;i> 56) == cmd_id) { + ++count; + } + } + return count; +} + +#define RDPQ_INIT() \ + rspq_init(); DEFER(rspq_close()); \ + rdpq_init(); DEFER(rdpq_close()); \ + rdpq_debug_start(); DEFER(rdpq_debug_stop()); + + +static void surface_clear(surface_t *s, uint8_t c) { + memset(s->buffer, c, s->height * s->stride); +} + +__attribute__((unused)) +static void debug_surface(const char *name, uint16_t *buf, int w, int h) { + debugf("Surface %s:\n", name); + for (int j=0;jheight;y++) { + uint32_t *line = (uint32_t*)(surf->buffer + y*surf->stride); + for (int x=0;xwidth;x++) { + color_t exp = check(x, y); + uint32_t exp32 = color_to_packed32(exp); + uint32_t found32 = line[x]; + if (found32 != exp32) { + if (diff) { + bool match = true; + for (int i=0;i<4;i++) { + uint8_t found = (found32 >> (i*8)) & 0xFF; + uint8_t exp = (exp32 >> (i*8)) & 0xFF; + if (ABS(found - exp) > diff) { + match = false; + break; + } + } + if (match) + continue; + } + + debug_surface32("Found:", surf->buffer, surf->width, surf->height); + ASSERT_EQUAL_HEX(found32, exp32, "invalid pixel at (%d,%d)", x, y); + } + } + } +} + +#define ASSERT_SURFACE_THRESHOLD(surf, thresh, func_body) ({ \ + color_t __check_surface(int x, int y) func_body; \ + assert_surface(ctx, surf, __check_surface, thresh); \ + if (ctx->result == TEST_FAILED) return; \ +}) + +#define ASSERT_SURFACE(surf, func_body) ASSERT_SURFACE_THRESHOLD(surf, 0, func_body) + + +void test_rdpq_rspqwait(TestContext *ctx) +{ + // Verify that rspq_wait() correctly also wait for RDP to terminate + // all its scheduled operations. + surface_t fb = surface_alloc(FMT_RGBA32, 128, 128); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + uint32_t *framebuffer = fb.buffer; + + RDPQ_INIT(); + + color_t color = RGBA32(0x11, 0x22, 0x33, 0xFF); + + rdpq_set_mode_fill(color); + rdpq_set_color_image(&fb); + rdpq_fill_rectangle(0, 0, 128, 128); + rspq_wait(); + + // Sample the end of the buffer immediately after rspq_wait. If rspq_wait + // doesn't wait for RDP to become idle, this pixel will not be filled at + // this point. + ASSERT_EQUAL_HEX(framebuffer[127*128+127], color_to_packed32(color), + "invalid color in framebuffer at (127,127)"); +} + +void test_rdpq_clear(TestContext *ctx) +{ + RDPQ_INIT(); + + color_t fill_color = RGBA32(0xFF, 0xFF, 0xFF, 0xFF); + + surface_t fb = surface_alloc(FMT_RGBA16, 32, 32); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + rdpq_set_mode_fill(fill_color); + rdpq_set_color_image(&fb); + rdpq_fill_rectangle(0, 0, 32, 32); + rspq_wait(); + + uint16_t *framebuffer = fb.buffer; + for (uint32_t i = 0; i < 32 * 32; i++) + { + ASSERT_EQUAL_HEX(framebuffer[i], color_to_packed16(fill_color), + "Framebuffer was not cleared properly! Index: %lu", i); + } +} + +void test_rdpq_dynamic(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 64; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0, sizeof(expected_fb)); + + rdpq_set_mode_fill(RGBA32(0,0,0,0)); + rdpq_set_color_image(&fb); + + for (uint32_t y = 0; y < WIDTH; y++) + { + for (uint32_t x = 0; x < WIDTH; x += 4) + { + color_t c = RGBA16(x, y, x+y, x^y); + expected_fb[y * WIDTH + x] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 1] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 2] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 3] = color_to_packed16(c); + rdpq_set_fill_color(c); + rdpq_set_scissor(x, y, x + 4, y + 1); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + } + } + + rspq_wait(); + + //dump_mem(framebuffer, TEST_RDPQ_FBSIZE); + //dump_mem(expected_fb, TEST_RDPQ_FBSIZE); + + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, "Framebuffer contains wrong data!"); +} + +void test_rdpq_passthrough_big(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0xFF, sizeof(expected_fb)); + + rdpq_set_color_image(&fb); + rdpq_set_blend_color(RGBA32(255,255,255,255)); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,0), (0,0,0,0))); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, 0, BLEND_RGB, 1))); + + rdp_draw_filled_triangle(0, 0, WIDTH, 0, WIDTH, WIDTH); + rdp_draw_filled_triangle(0, 0, 0, WIDTH, WIDTH, WIDTH); + + rspq_wait(); + + //dump_mem(framebuffer, TEST_RDPQ_FBSIZE); + //dump_mem(expected_fb, TEST_RDPQ_FBSIZE); + + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, "Framebuffer contains wrong data!"); +} + +void test_rdpq_block(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 64; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0xAA); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0, sizeof(expected_fb)); + + rspq_block_begin(); + rdpq_set_mode_fill(RGBA32(0,0,0,0)); + + for (uint32_t y = 0; y < WIDTH; y++) + { + for (uint32_t x = 0; x < WIDTH; x += 4) + { + color_t c = RGBA16(x, y, x+y, x^y); + expected_fb[y * WIDTH + x] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 1] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 2] = color_to_packed16(c); + expected_fb[y * WIDTH + x + 3] = color_to_packed16(c); + rdpq_set_fill_color(c); + rdpq_set_scissor(x, y, x + 4, y + 1); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + } + } + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + + rdpq_set_color_image(&fb); + rspq_block_run(block); + rspq_wait(); + + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, "Framebuffer contains wrong data!"); +} + +void test_rdpq_block_coalescing(TestContext *ctx) +{ + RDPQ_INIT(); + + // The actual commands don't matter because they are never executed + rspq_block_begin(); + + // These 3 commands are supposed to go to the static RDP buffer, and + // the 3 RSPQ_CMD_RDP commands will be coalesced into one + rdpq_set_env_color(RGBA32(0,0,0,0)); + rdpq_set_blend_color(RGBA32(0, 0, 0, 0)); + rdpq_set_tile(TILE0, FMT_RGBA16, 0, 16, 0); + + // This command is a fixup + rdpq_set_fill_color(RGBA16(0, 0, 0, 0)); + + // These 3 should also have their RSPQ_CMD_RDP coalesced + rdpq_set_env_color(RGBA32(0,0,0,0)); + rdpq_set_blend_color(RGBA32(0, 0, 0, 0)); + rdpq_set_tile(TILE0, FMT_RGBA16, 0, 16, 0); + + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + + uint64_t *rdp_cmds = (uint64_t*)block->rdp_block->cmds; + + uint32_t expected_cmds[] = { + // auto sync + First 3 commands + auto sync + (RSPQ_CMD_RDP_SET_BUFFER << 24) | PhysicalAddr(rdp_cmds + 5), + PhysicalAddr(rdp_cmds), + PhysicalAddr(rdp_cmds + RDPQ_BLOCK_MIN_SIZE/2), + // Fixup command (leaves a hole in rdp block) + (RDPQ_CMD_SET_FILL_COLOR_32 + 0xC0) << 24, + 0, + // Last 3 commands + (RSPQ_CMD_RDP_APPEND_BUFFER << 24) | PhysicalAddr(rdp_cmds + 9), + }; + + ASSERT_EQUAL_MEM((uint8_t*)block->cmds, (uint8_t*)expected_cmds, sizeof(expected_cmds), "Block commands don't match!"); +} + +void test_rdpq_block_contiguous(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 64; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0xFF, sizeof(expected_fb)); + + rspq_block_begin(); + /* 1: implicit sync pipe */ + /* 2: */ rdpq_set_color_image(&fb); + /* 3: implicit set fill color */ + /* 4: implicit set scissor */ + /* 5: */ rdpq_set_mode_fill(RGBA32(0xFF, 0xFF, 0xFF, 0xFF)); + /* 6: implicit set scissor */ + /* 7: empty slot for potential SET_COMBINE_MODE (not used by rdpq_set_mode_fill) */ + /* 8: set fill color */ + /* 9: */ rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + /*10: */ rdpq_fence(); // Put the fence inside the block so RDP never executes anything outside the block + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + + rspq_block_run(block); + rspq_syncpoint_wait(rspq_syncpoint_new()); + + uint64_t *rdp_cmds = (uint64_t*)block->rdp_block->cmds; + + ASSERT_EQUAL_HEX(*DP_START, PhysicalAddr(rdp_cmds), "DP_START does not point to the beginning of the block!"); + ASSERT_EQUAL_HEX(*DP_END, PhysicalAddr(rdp_cmds + 10), "DP_END points to the wrong address!"); + + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, "Framebuffer contains wrong data!"); +} + +void test_rdpq_block_dynamic(TestContext *ctx) +{ + RDPQ_INIT(); + debug_rdp_stream_init(); + + test_ovl_init(); + DEFER(test_ovl_close()); + + const int WIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + + surface_clear(&fb, 0); + rdpq_set_mode_standard(); + + void test_with_nops(int nops_to_generate) { + debug_rdp_stream_reset(); + + rspq_block_begin(); + // First, issue a passthrough command + rdpq_set_fog_color(RGBA32(0x11,0x11,0x11,0x11)); + // Then, issue a command that creates large dynamic commands + // We use a test command that creates 8 RDP NOPs. + rspq_test_send_rdp_nops(nops_to_generate); + // Issue another passhtrough + rdpq_set_blend_color(RGBA32(0x22,0x22,0x22,0x22)); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + + rspq_block_run(block); + rdpq_set_blend_color(RGBA32(0x33,0x33,0x33,0x33)); + rspq_wait(); + + int num_fc = debug_rdp_stream_count_cmd(0xF8); // SET_FOG_COLOR + int num_bc = debug_rdp_stream_count_cmd(0xF9); // SET_BLEND_COLOR + int num_nops = debug_rdp_stream_count_cmd(0xC0); // NOOP + ASSERT_EQUAL_SIGNED(num_fc, 1, "invalid number of SET_FOG_COLOR"); + ASSERT_EQUAL_SIGNED(num_bc, 2, "invalid number of SET_BLEND_COLOR"); + ASSERT_EQUAL_SIGNED(num_nops, nops_to_generate, "invalid number of NOP"); + + // Check that all the nops come after fog and before blend + bool found_fog = false; + bool found_blend = false; + for (int i=0;i> 56) == 0xF8) { found_fog = true; continue; } + if ((rdp_stream[i] >> 56) == 0xF9) { found_blend = true; continue; } + if ((rdp_stream[i] >> 56) == 0xC0) { + ASSERT(found_fog && !found_blend, "Invalid position of NOP within the stream"); + } + } + + // Also test that there is just one static RDP block in the block. This + // verifies that, in case we switched to the dynamic buffer for the blocks, + // we correctly reused the block later. + int num_rdp_blocks = 0; + rdpq_block_t *rdp_block = block->rdp_block; + while (rdp_block) { + ++num_rdp_blocks; + rdp_block = rdp_block->next; + } + ASSERT_EQUAL_SIGNED(num_rdp_blocks, 1, "invalid number of RDP static blocks"); + } + + // Test with a small number of nops: + rdpq_debug_log_msg("test 8"); + test_with_nops(8); + if (ctx->result == TEST_FAILED) return; + + rdpq_debug_log_msg("test 128"); + test_with_nops(128); + if (ctx->result == TEST_FAILED) return; +} + +void test_rdpq_change_other_modes(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + + // Set standard mode with a combiner that doesn't use a fixed color + surface_clear(&fb, 0); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_TEX); + + // Switch to fill mode via change other modes, and fill the framebuffer + rdpq_debug_log_msg("try SOM change (dynamic)"); + rdpq_change_other_modes_raw(SOM_CYCLE_MASK, SOM_CYCLE_FILL); + rdpq_set_fill_color(RGBA32(255,0,0,255)); + rdpq_fill_rectangle(0,0,WIDTH,WIDTH); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,0,0,255); }); + + // Do it again in a block + surface_clear(&fb, 0); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_TEX); + + rspq_block_begin(); + rdpq_debug_log_msg("try SOM change (block)"); + rdpq_change_other_modes_raw(SOM_CYCLE_MASK, SOM_CYCLE_FILL); + rdpq_set_fill_color(RGBA32(255,0,0,255)); + rdpq_fill_rectangle(0,0,WIDTH,WIDTH); + rspq_block_t *b = rspq_block_end(); + DEFER(rspq_block_free(b)); + + rspq_block_run(b); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,0,0,255); }); +} + + +void test_rdpq_fixup_setfillcolor(TestContext *ctx) +{ + RDPQ_INIT(); + + const color_t TEST_COLOR = RGBA32(0xAA,0xBB,0xCC,0xDD); + + const int WIDTH = 64; + surface_t fb = surface_alloc(FMT_RGBA32, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + + uint32_t expected_fb32[WIDTH*WIDTH]; + memset(expected_fb32, 0, sizeof(expected_fb32)); + for (int i=0;i> 3; + int g = TEST_COLOR.g >> 3; + int b = TEST_COLOR.b >> 3; + expected_fb16[i] = ((r & 0x1F) << 11) | ((g & 0x1F) << 6) | ((b & 0x1F) << 1) | (TEST_COLOR.a >> 7); + } + + rdpq_set_mode_fill(RGBA32(0,0,0,0)); + + surface_clear(&fb, 0); + rdpq_set_color_image(&fb); + rdpq_set_fill_color(TEST_COLOR); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb32, WIDTH*WIDTH*4, + "Wrong data in framebuffer (32-bit, dynamic mode)"); + + surface_clear(&fb, 0); + rdpq_set_color_image_raw(0, PhysicalAddr(fb.buffer), FMT_RGBA16, WIDTH, WIDTH, WIDTH*2); + rdpq_set_fill_color(TEST_COLOR); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb16, WIDTH*WIDTH*2, + "Wrong data in framebuffer (16-bit, dynamic mode)"); + + surface_clear(&fb, 0); + rdpq_set_fill_color(TEST_COLOR); + rdpq_set_color_image(&fb); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb32, WIDTH*WIDTH*4, + "Wrong data in framebuffer (32-bit, dynamic mode, update)"); + + surface_clear(&fb, 0); + rdpq_set_fill_color(TEST_COLOR); + rdpq_set_color_image_raw(0, PhysicalAddr(fb.buffer), FMT_RGBA16, WIDTH, WIDTH, WIDTH*2); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb16, WIDTH*WIDTH*2, + "Wrong data in framebuffer (16-bit, dynamic mode, update)"); +} + +void test_rdpq_fixup_setscissor(TestContext *ctx) +{ + RDPQ_INIT(); + + const color_t TEST_COLOR = RGBA32(0xFF,0xFF,0xFF,0xFF); + + const int WIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0, sizeof(expected_fb)); + for (int y=4;y= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,0,255,0) : RGBA32(0,0,0,0); + }); + + rdpq_debug_log_msg("rect mode standard"); + surface_clear(&fb, 0); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_FLAT); + rdpq_set_prim_color(RGBA32(255,128,255,0)); + rdpq_fill_rectangle(4, 4, FBWIDTH-4, FBWIDTH-4); + rspq_wait(); + ASSERT_SURFACE(&fb, { + return (x >= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,128,255,FULL_CVG) : RGBA32(0,0,0,0); + }); + + { + rdpq_debug_log_msg("rect mode fill (block)"); + surface_clear(&fb, 0); + rspq_block_begin(); + rdpq_set_mode_fill(RGBA32(255,0,255,0)); + rdpq_fill_rectangle(4, 4, FBWIDTH-4, FBWIDTH-4); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + rspq_block_run(block); + rspq_wait(); + ASSERT_SURFACE(&fb, { + return (x >= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,0,255,0) : RGBA32(0,0,0,0); + }); + } + + { + rdpq_debug_log_msg("rect mode standard (block)"); + surface_clear(&fb, 0); + rspq_block_begin(); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_FLAT); + rdpq_set_prim_color(RGBA32(255,128,255,0)); + rdpq_fill_rectangle(4, 4, FBWIDTH-4, FBWIDTH-4); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + rspq_block_run(block); + rspq_wait(); + ASSERT_SURFACE(&fb, { + return (x >= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,128,255,FULL_CVG) : RGBA32(0,0,0,0); + }); + } + + { + rdpq_debug_log_msg("only rect in block, mode fill"); + surface_clear(&fb, 0); + rdpq_set_mode_fill(RGBA32(255,0,255,0)); + rspq_block_begin(); + rdpq_fill_rectangle(4, 4, FBWIDTH-4, FBWIDTH-4); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + rspq_block_run(block); + rspq_wait(); + ASSERT_SURFACE(&fb, { + return (x >= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,0,255,0) : RGBA32(0,0,0,0); + }); + } + + { + rdpq_debug_log_msg("only rect in block, mode standard"); + surface_clear(&fb, 0); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_FLAT); + rdpq_set_prim_color(RGBA32(255,128,255,0)); + rspq_block_begin(); + rdpq_fill_rectangle(4, 4, FBWIDTH-4, FBWIDTH-4); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + rspq_block_run(block); + rspq_wait(); + ASSERT_SURFACE(&fb, { + return (x >= 4 && y >= 4 && x < FBWIDTH-4 && y < FBWIDTH-4) ? + RGBA32(255,128,255,FULL_CVG) : RGBA32(0,0,0,0); + }); + } +} + +void test_rdpq_lookup_address(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + const color_t TEST_COLOR = RGBA32(0xFF,0xFF,0xFF,0xFF); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0xFF, sizeof(expected_fb)); + + rdpq_set_mode_fill(TEST_COLOR); + + surface_clear(&fb, 0); + rspq_block_begin(); + rdpq_set_color_image_raw(1, 0, FMT_RGBA16, WIDTH, WIDTH, WIDTH * 2); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + rdpq_set_lookup_address(1, fb.buffer); + rspq_block_run(block); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, + "Wrong data in framebuffer (static mode)"); + + surface_clear(&fb, 0); + rdpq_set_lookup_address(1, fb.buffer); + rdpq_set_color_image_raw(1, 0, FMT_RGBA16, WIDTH, WIDTH, WIDTH * 2); + rdpq_fill_rectangle(0, 0, WIDTH, WIDTH); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, WIDTH*WIDTH*2, + "Wrong data in framebuffer (dynamic mode)"); +} + +void test_rdpq_lookup_address_offset(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 15; + surface_t fb = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + #define TEST_RDPQ_RECT_OFF 4 + #define TEST_RDPQ_RECT_WIDTH (WIDTH-(TEST_RDPQ_RECT_OFF*2)) + + const color_t TEST_COLOR = RGBA32(0xFF,0xFF,0xFF,0xFF); + + uint16_t expected_fb[WIDTH*WIDTH]; + memset(expected_fb, 0, sizeof(expected_fb)); + for (int y=TEST_RDPQ_RECT_OFF;yrdp_block, "rdpq block is empty?"); + rspq_block_run(block); + } + + // Execute the provided function (also after the block, if requested). + // This allows us also to get coverage of the post-block autosync state + func(); + rspq_wait(); + + // Go through the stream of RDP commands and count the syncs + uint8_t cnt[4] = {0}; + for (int i=0;i> 56; + if (cmd == RDPQ_CMD_SYNC_LOAD+0xC0) cnt[0]++; + if (cmd == RDPQ_CMD_SYNC_TILE+0xC0) cnt[1]++; + if (cmd == RDPQ_CMD_SYNC_PIPE+0xC0) cnt[2]++; + if (cmd == RDPQ_CMD_SYNC_FULL+0xC0) cnt[3]++; + } + ASSERT_EQUAL_MEM(cnt, exp, 4, "Unexpected sync commands"); +} + +static void __autosync_pipe1(void) { + rdpq_set_other_modes_raw(SOM_CYCLE_FILL); + rdpq_set_fill_color(RGBA32(0,0,0,0)); + rdpq_fill_rectangle(0, 0, 8, 8); + // PIPESYNC HERE + rdpq_set_other_modes_raw(SOM_CYCLE_FILL); + rdpq_fill_rectangle(0, 0, 8, 8); + // NO PIPESYNC HERE + rdpq_set_prim_color(RGBA32(1,1,1,1)); + // NO PIPESYNC HERE + rdpq_set_prim_depth_raw(0, 1); + // NO PIPESYNC HERE + rdpq_set_scissor(0,0,1,1); + rdpq_fill_rectangle(0, 0, 8, 8); +} +static uint8_t __autosync_pipe1_exp[4] = {0,0,1,1}; +static uint8_t __autosync_pipe1_blockexp[4] = {0,0,4,1}; + +static void __autosync_tile1(void) { + rdpq_set_tile(0, FMT_RGBA16, 0, 128, 0); + rdpq_set_tile_size(0, 0, 0, 16, 16); + rdpq_texture_rectangle(0, 0, 0, 4, 4, 0, 0); + // NO TILESYNC HERE + rdpq_set_tile(1, FMT_RGBA16, 0, 128, 0); + rdpq_set_tile_size(1, 0, 0, 16, 16); + rdpq_texture_rectangle(1, 0, 0, 4, 4, 0, 0); + rdpq_set_tile(2, FMT_RGBA16, 0, 128, 0); + rdpq_set_tile_size(2, 0, 0, 16, 16); + // NO TILESYNC HERE + rdpq_set_tile(2, FMT_RGBA16, 0, 256, 0); + // NO TILESYNC HERE + rdpq_texture_rectangle(1, 0, 0, 4, 4, 0, 0); + rdpq_texture_rectangle(0, 0, 0, 4, 4, 0, 0); + // TILESYNC HERE + rdpq_set_tile(1, FMT_RGBA16, 0, 256, 0); + rdpq_set_tile_size(1, 0, 0, 16, 16); + rdpq_texture_rectangle(1, 0, 0, 4, 4, 0, 0); + // TILESYNC HERE + rdpq_set_tile_size(1, 0, 0, 32, 32); + +} +static uint8_t __autosync_tile1_exp[4] = {0,2,0,1}; +static uint8_t __autosync_tile1_blockexp[4] = {0,5,0,1}; + +static void __autosync_load1(void) { + surface_t tex = surface_alloc(FMT_I8, 8, 8); + DEFER(surface_free(&tex)); + + rdpq_set_texture_image(&tex); + rdpq_set_tile(0, FMT_RGBA16, 0, 128, 0); + // NO LOADSYNC HERE + rdpq_load_tile(0, 0, 0, 7, 7); + rdpq_set_tile(1, FMT_RGBA16, 0, 128, 0); + // NO LOADSYNC HERE + rdpq_load_tile(1, 0, 0, 7, 7); + // NO LOADSYNC HERE + rdpq_texture_rectangle(1, 0, 0, 4, 4, 0, 0); + // LOADSYNC HERE + rdpq_load_tile(0, 0, 0, 7, 7); +} +static uint8_t __autosync_load1_exp[4] = {1,1,0,1}; +static uint8_t __autosync_load1_blockexp[4] = {3,4,2,1}; + +void test_rdpq_autosync(TestContext *ctx) { + LOG("__autosync_pipe1\n"); + __test_rdpq_autosyncs(ctx, __autosync_pipe1, __autosync_pipe1_exp, false); + if (ctx->result == TEST_FAILED) return; + + LOG("__autosync_pipe1 (block)\n"); + __test_rdpq_autosyncs(ctx, __autosync_pipe1, __autosync_pipe1_blockexp, true); + if (ctx->result == TEST_FAILED) return; + + LOG("__autosync_tile1\n"); + __test_rdpq_autosyncs(ctx, __autosync_tile1, __autosync_tile1_exp, false); + if (ctx->result == TEST_FAILED) return; + + LOG("__autosync_tile1 (block)\n"); + __test_rdpq_autosyncs(ctx, __autosync_tile1, __autosync_tile1_blockexp, true); + if (ctx->result == TEST_FAILED) return; + + LOG("__autosync_load1\n"); + __test_rdpq_autosyncs(ctx, __autosync_load1, __autosync_load1_exp, false); + if (ctx->result == TEST_FAILED) return; + + LOG("__autosync_load1 (block)\n"); + __test_rdpq_autosyncs(ctx, __autosync_load1, __autosync_load1_blockexp, true); + if (ctx->result == TEST_FAILED) return; +} + + +void test_rdpq_automode(TestContext *ctx) { + RDPQ_INIT(); + + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + const int TEXWIDTH = FBWIDTH - 8; + surface_t tex = surface_alloc(FMT_RGBA16, TEXWIDTH, TEXWIDTH); + DEFER(surface_free(&tex)); + surface_clear(&tex, 0); + + uint16_t expected_fb[FBWIDTH*FBWIDTH]; + memset(expected_fb, 0xFF, sizeof(expected_fb)); + for (int y=0;y 1 cycle + rdpq_debug_log_msg("1pass combiner => 1 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_combiner(RDPQ_COMBINER1((ZERO, ZERO, ZERO, TEX0), (ZERO, ZERO, ZERO, ZERO))); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_1, "invalid cycle type"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=1pass, blender=off)"); + + // Activate blending (1-pass blender) => 1 cycle + rdpq_debug_log_msg("1pass blender => 1 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, FOG_ALPHA, BLEND_RGB, INV_MUX_ALPHA))); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_1, "invalid cycle type"); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=1pass, blender=1pass)"); + + // Activate fogging (2-pass blender) => 2 cycle + rdpq_debug_log_msg("2pass blender => 2 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_fog(RDPQ_BLENDER((BLEND_RGB, ZERO, IN_RGB, INV_MUX_ALPHA))); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_2, "invalid cycle type"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=1pass, blender=2pass)"); + + // Set two-pass combiner => 2 cycle + rdpq_debug_log_msg("2pass combiner => 2 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_combiner(RDPQ_COMBINER2( + (ZERO, ZERO, ZERO, ENV), (ENV, ZERO, TEX0, PRIM), + (TEX1, ZERO, COMBINED_ALPHA, ZERO), (ZERO, ZERO, ZERO, ZERO))); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_2, "invalid cycle type"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=2pass, blender=2pass)"); + + // Disable fogging (1 pass blender) => 2 cycle + rdpq_debug_log_msg("1pass blender => 2 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_fog(0); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_2, "invalid cycle type"); + ASSERT_EQUAL_HEX(som & 0xCCCC0000, 0, "invalid blender formula in first cycle"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=2pass, blender=1pass)"); + + // Set simple combiner => 1 cycle + rdpq_debug_log_msg("1pass combiner => 1 cycle"); + surface_clear(&fb, 0xFF); + rdpq_mode_combiner(RDPQ_COMBINER1((ZERO, ZERO, ZERO, TEX0), (ZERO, ZERO, ZERO, ZERO))); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_1, "invalid cycle type"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=1pass, blender=1pass)"); + + // Push the current mode, then modify several states, then pop. + rdpq_debug_log_msg("push/pop"); + rdpq_mode_push(); + rdpq_mode_combiner(RDPQ_COMBINER2( + (ZERO, ZERO, ZERO, TEX0), (ZERO, ZERO, ZERO, ZERO), + (COMBINED, ZERO, ZERO, TEX1), (ZERO, ZERO, ZERO, ZERO) + )); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, ZERO, BLEND_RGB, ONE))); + rdpq_mode_dithering(DITHER_NOISE_NOISE); + rdpq_mode_pop(); + rdpq_texture_rectangle(0, 4, 4, FBWIDTH-4, FBWIDTH-4, 0, 0); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_1, "invalid cycle type"); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*2, + "Wrong data in framebuffer (comb=1pass, blender=1pass (after pop))"); +} + +void test_rdpq_blender(TestContext *ctx) { + RDPQ_INIT(); + + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + const int TEXWIDTH = FBWIDTH; + surface_t tex = surface_alloc(FMT_RGBA16, TEXWIDTH, TEXWIDTH); + DEFER(surface_free(&tex)); + surface_clear(&tex, 0xAA); + + const color_t BLEND_COLOR = RGBA32(0x30, 0x30, 0x30, 0xFF); + const color_t BLEND_COLOR2 = RGBA32(0x30*2-1, 0x30*2-1, 0x30*2-1, 0xFF); + + uint16_t expected_fb_blend[FBWIDTH*FBWIDTH], expected_fb_blend2[FBWIDTH*FBWIDTH], expected_fb_tex[FBWIDTH*FBWIDTH]; + memset(expected_fb_blend, 0, sizeof(expected_fb_blend)); + memset(expected_fb_blend2, 0, sizeof(expected_fb_blend2)); + memset(expected_fb_tex, 0, sizeof(expected_fb_tex)); + for (int y=4;y= 4 && i < 12 && j >= 4 && j <12) + expected_fb[j * FBWIDTH + i] = alt ? 0x989898e0 : 0x585858e0; + else + expected_fb[j * FBWIDTH + i] = alt ? 0xB0B0B080 : 0x30303080; + } + } + + const int TEXWIDTH = 8; + surface_t tex = surface_alloc(FMT_RGBA32, TEXWIDTH, TEXWIDTH); + DEFER(surface_free(&tex)); + surface_clear(&tex, 0x80); + + rdpq_set_fog_color(RGBA32(0,0,0,0x80)); + rdpq_set_color_image(&fb); + rdpq_tex_upload(TILE0, &tex, NULL); + rdpq_set_mode_standard(); + rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY); + rdpq_triangle(&TRIFMT_TEX, + (float[]){ 4.0f, 4.0f, 0.0f, 0.0f, 1.0f }, + (float[]){ 12.0f, 4.0f, 8.0f, 0.0f, 1.0f }, + (float[]){ 12.0f, 12.0f, 8.0f, 8.0f, 1.0f } + ); + rdpq_triangle(&TRIFMT_TEX, + (float[]){ 4.0f, 4.0f, 0.0f, 0.0f, 1.0f }, + (float[]){ 4.0f, 12.0f, 0.0f, 8.0f, 1.0f }, + (float[]){ 12.0f, 12.0f, 8.0f, 8.0f, 1.0f } + ); + rspq_wait(); + ASSERT_EQUAL_MEM((uint8_t*)fb.buffer, (uint8_t*)expected_fb, FBWIDTH*FBWIDTH*4, "Wrong data in framebuffer"); + uint64_t som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & SOM_CYCLE_MASK, SOM_CYCLE_1, "invalid cycle type"); +} + +void test_rdpq_fog(TestContext *ctx) { + RDPQ_INIT(); + + const int FULL_CVG = 7 << 5; // full coverage + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + rdpq_set_fog_color(RGBA32(0,255,0,255)); + rdpq_set_blend_color(RGBA32(0,0,255,255)); + surface_clear(&fb, 0); + + // Draw with standard texturing + rdpq_debug_log_msg("Standard combiner SHADE - no fog"); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_SHADE); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, } + ); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ 0, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, } + ); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,0,255,FULL_CVG); }); + + // Activate fog + rdpq_debug_log_msg("Standard combiner SHADE - fog"); + rdpq_mode_fog(RDPQ_FOG_STANDARD); + // Set also a blender that uses IN_ALPHA. + // This has two effects: it tests the whole pipeline after switching to + // 2cycle mode, and then also checks that IN_ALPHA is 1, which is what + // we expect for COMBINER_SHADE when fog is in effect. + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, IN_ALPHA, BLEND_RGB, INV_MUX_ALPHA))); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, } + ); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ 0, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 0.0f, 1.0f, 0.5f, } + ); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(0x77,0x87,0x77,FULL_CVG); }); + + // Draw with a custom combiner + rdpq_debug_log_msg("Custom combiner - no fog"); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((1,0,PRIM,0), (1,0,PRIM,0))); + rdpq_set_prim_color(RGBA32(255,0,0,255)); + rdpq_fill_rectangle(0, 0, FBWIDTH, FBWIDTH); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,0,0,FULL_CVG); }); + + // Activate fog + rdpq_debug_log_msg("Custom combiner - fog"); + rdpq_mode_fog(RDPQ_FOG_STANDARD); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 1.0f, 1.0f, 0.5f, } + ); + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ 0, FBWIDTH, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 1.0f, 1.0f, 0.5f, } + ); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(0x77,0x87,0,FULL_CVG); }); + + // Disable fog + rdpq_mode_fog(0); + rdpq_fill_rectangle(0, 0, FBWIDTH, FBWIDTH); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,0,0,FULL_CVG); }); +} + +void test_rdpq_mode_antialias(TestContext *ctx) { + RDPQ_INIT(); + + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + surface_clear(&fb, 0); + + void draw_tri(void) { + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 1.0f, 1.0f, 0.5f, } + ); + } + + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_SHADE); + draw_tri(); + rspq_wait(); + uint64_t som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_CYCLE_1 | SOM_COVERAGE_DEST_ZAP, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("aa"); + rdpq_mode_antialias(AA_STANDARD); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_READ_ENABLE | SOM_CYCLE_1 | SOM_COVERAGE_DEST_CLAMP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("ra"); + rdpq_mode_antialias(AA_REDUCED); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_CYCLE_1 | SOM_COVERAGE_DEST_CLAMP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("blender+ra"); + rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOM_CYCLE_1 | SOM_COVERAGE_DEST_WRAP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("blender+aa"); + rdpq_mode_antialias(AA_STANDARD); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOM_CYCLE_1 | SOM_COVERAGE_DEST_WRAP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("blender"); + rdpq_mode_antialias(AA_NONE); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_BLENDING | SOM_READ_ENABLE | SOM_CYCLE_1 | SOM_COVERAGE_DEST_ZAP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("blender+aa+fog"); + rdpq_mode_fog(RDPQ_FOG_STANDARD); + rdpq_mode_antialias(AA_STANDARD); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_2 | SOM_COVERAGE_DEST_WRAP, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("blender+ra+fog"); + rdpq_mode_fog(RDPQ_FOG_STANDARD); + rdpq_mode_antialias(AA_REDUCED); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_2 | SOM_COVERAGE_DEST_WRAP, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("ra+fog"); + rdpq_mode_blender(false); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOMX_FOG | SOM_CYCLE_2 | SOM_COVERAGE_DEST_CLAMP, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("aa+fog"); + rdpq_mode_antialias(AA_STANDARD); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_2 | SOM_COVERAGE_DEST_CLAMP, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("fog"); + rdpq_mode_antialias(AA_NONE); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_BLENDING | SOMX_FOG | SOM_CYCLE_1 | SOM_COVERAGE_DEST_ZAP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("nothing"); + rdpq_mode_fog(0); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_CYCLE_1 | SOM_COVERAGE_DEST_ZAP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0xCCCC0000, 0, "invalid blender formula in first cycle"); + ASSERT_EQUAL_HEX(som & 0x33330000, 0, "invalid blender formula in second cycle"); + + rdpq_debug_log_msg("aa+lod"); + rdpq_mode_antialias(AA_STANDARD); + rdpq_mode_mipmap(MIPMAP_NEAREST, 1); + draw_tri(); + rspq_wait(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_AA_ENABLE | SOM_BLENDING | SOM_READ_ENABLE | SOMX_FOG | SOM_CYCLE_MASK | SOM_COVERAGE_DEST_MASK), + SOM_AA_ENABLE | SOM_READ_ENABLE | SOM_CYCLE_2 | SOM_COVERAGE_DEST_CLAMP, + "invalid SOM configuration: %08llx", som); + ASSERT_EQUAL_HEX(som & 0xCCCC0000, 0, "invalid blender formula in first cycle"); +} + +void test_rdpq_mode_alphacompare(TestContext *ctx) { + RDPQ_INIT(); + + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + surface_clear(&fb, 0); + + void draw_tri(void) { + rdpq_triangle(&TRIFMT_SHADE, + // X Y R G B A + (float[]){ 0, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, 0, 1.0f, 1.0f, 1.0f, 0.5f, }, + (float[]){ FBWIDTH, FBWIDTH, 1.0f, 1.0f, 1.0f, 0.5f, } + ); + } + + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER_SHADE); + rdpq_mode_antialias(AA_NONE); + + rdpq_debug_log_msg("threshold=0"); + rdpq_mode_alphacompare(0); + draw_tri(); + uint64_t som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_NONE | SOM_BLALPHA_CC, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("threshold>0"); + rdpq_mode_alphacompare(127); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_THRESHOLD | SOM_BLALPHA_CC, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("threshold<0"); + rdpq_mode_alphacompare(-1); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_NOISE | SOM_BLALPHA_CC, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("aa+threshold=0"); + rdpq_mode_antialias(AA_STANDARD); + rdpq_mode_alphacompare(0); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_NONE | SOM_BLALPHA_CVG, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("aa+threshold>0"); + rdpq_mode_alphacompare(127); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_NONE | SOM_BLALPHA_CVG_TIMES_CC, + "invalid SOM configuration: %08llx", som); + + rdpq_debug_log_msg("aa+threshold<0"); + rdpq_mode_alphacompare(-1); + draw_tri(); + som = rdpq_get_other_modes_raw(); + ASSERT_EQUAL_HEX(som & + (SOM_ALPHACOMPARE_MASK | SOM_BLALPHA_MASK), + SOM_ALPHACOMPARE_NONE | SOM_BLALPHA_CVG_TIMES_CC, + "invalid SOM configuration: %08llx", som); +} + +void test_rdpq_mode_freeze(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + const int FULL_CVG = 7 << 5; // full coverage + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + surface_clear(&fb, 0); + + rdpq_debug_log_msg("Mode freeze: standard"); + rdpq_set_mode_fill(RGBA32(255,255,255,255)); + rdpq_debug_log_msg("Freeze start"); + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_set_blend_color(RGBA32(255,255,255,255)); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,0), (0,0,0,0))); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, 0, BLEND_RGB, 1))); + rdpq_mode_filter(FILTER_POINT); + rdpq_mode_alphacompare(false); + rdpq_debug_log_msg("Freeze end"); + rdpq_mode_end(); + + rdp_draw_filled_triangle(0, 0, FBWIDTH, 0, FBWIDTH, FBWIDTH); + rdp_draw_filled_triangle(0, 0, 0, FBWIDTH, FBWIDTH, FBWIDTH); + rspq_wait(); + + ASSERT_SURFACE(&fb, { return RGBA32(255,255,255,FULL_CVG); }); + + uint32_t num_ccs = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_COMBINE_MODE_RAW + 0xC0); + uint32_t num_soms = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_OTHER_MODES + 0xC0); + + // Inspect the dynamic buffer. We want to verify that only the right number of SOM/CC + ASSERT_EQUAL_SIGNED(num_ccs, 1, "too many SET_COMBINE_MODE"); + ASSERT_EQUAL_SIGNED(num_soms, 2, "too many SET_OTHER_MODES"); // 1 SOM for fill, 1 SOM for standard + + // Try again within a block. + debug_rdp_stream_reset(); + surface_clear(&fb, 0); + rdpq_debug_log_msg("Mode freeze: in block"); + rspq_block_begin(); + rdpq_set_mode_fill(RGBA32(255,255,255,255)); + rdpq_debug_log_msg("Freeze start"); + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_set_blend_color(RGBA32(255,255,255,255)); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,0), (0,0,0,0))); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, 0, BLEND_RGB, 1))); + rdpq_mode_filter(FILTER_POINT); + rdpq_mode_alphacompare(false); + rdpq_mode_end(); + rdp_draw_filled_triangle(0, 0, FBWIDTH, 0, FBWIDTH, FBWIDTH); + rdp_draw_filled_triangle(0, 0, 0, FBWIDTH, FBWIDTH, FBWIDTH); + rspq_block_t *block = rspq_block_end(); + DEFER(rspq_block_free(block)); + + rspq_block_run(block); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,255,255,FULL_CVG); }); + + num_ccs = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_COMBINE_MODE_RAW + 0xC0); + num_soms = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_OTHER_MODES + 0xC0); + int num_nops = debug_rdp_stream_count_cmd(0xC0); + ASSERT_EQUAL_SIGNED(num_ccs, 1, "too many SET_COMBINE_MODE"); + ASSERT_EQUAL_SIGNED(num_soms, 2, "too many SET_OTHER_MODES"); // 1 SOM for fill, 1 SOM for standard + ASSERT_EQUAL_SIGNED(num_nops, 1, "too many NOPs"); // 1 NOP from rrdpq_set_mode_fill (skips generating SET_CC) + + // Try again within a block, but doing the freeze outside of it + debug_rdp_stream_reset(); + surface_clear(&fb, 0); + rdpq_debug_log_msg("Mode freeze: calling a block in frozen mode"); + + rspq_block_begin(); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,0), (0,0,0,0))); + rdpq_mode_blender(RDPQ_BLENDER((IN_RGB, 0, BLEND_RGB, 1))); + rdpq_set_blend_color(RGBA32(255,255,255,255)); + rspq_block_t *block2 = rspq_block_end(); + DEFER(rspq_block_free(block2)); + + rdpq_set_mode_fill(RGBA32(255,255,255,255)); + rdpq_debug_log_msg("Freeze start"); + rdpq_mode_begin(); + rspq_block_run(block2); + rdpq_debug_log_msg("Freeze end"); + rdpq_mode_end(); + rdp_draw_filled_triangle(0, 0, FBWIDTH, 0, FBWIDTH, FBWIDTH); + rdp_draw_filled_triangle(0, 0, 0, FBWIDTH, FBWIDTH, FBWIDTH); + rspq_wait(); + ASSERT_SURFACE(&fb, { return RGBA32(255,255,255,FULL_CVG); }); + + num_ccs = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_COMBINE_MODE_RAW + 0xC0); + num_soms = debug_rdp_stream_count_cmd(RDPQ_CMD_SET_OTHER_MODES + 0xC0); + num_nops = debug_rdp_stream_count_cmd(0xC0); + ASSERT_EQUAL_SIGNED(num_ccs, 1, "too many SET_COMBINE_MODE"); + ASSERT_EQUAL_SIGNED(num_soms, 2, "too many SET_OTHER_MODES"); // 1 SOM for fill, 1 SOM for standard + ASSERT_EQUAL_SIGNED(num_nops, 9, "wrong number of NOPs"); +} + +void test_rdpq_mode_freeze_stack(TestContext *ctx) { + RDPQ_INIT(); + + const int FULL_CVG = 7 << 5; // full coverage + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + rdpq_set_color_image(&fb); + surface_clear(&fb, 0); + + rdpq_debug_log_msg("begin / push / end"); + rdpq_set_mode_standard(); + rdpq_mode_begin(); + rdpq_mode_push(); + rdpq_set_mode_fill(RGBA32(255,255,255,0)); + rdpq_mode_end(); + + rdpq_fill_rectangle(2, 0, FBWIDTH-2, FBWIDTH); + rspq_wait(); + + ASSERT_SURFACE(&fb, { + return (x>=2 && x=2 && x> 56) == 0xCB) { + int levels = ((rdp_stream[i] >> 51) & 7) + 1; + ASSERT_EQUAL_SIGNED(levels, 4, "invalid number of mipmap levels"); + } + } +} + +void test_rdpq_autotmem(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + rdpq_set_tile_autotmem(0); + rdpq_set_tile(TILE0, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(128); + rdpq_set_tile(TILE1, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(64); + rdpq_set_tile(TILE2, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(-1); + + rdpq_set_tile_autotmem(0); + rdpq_set_tile(TILE3, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(128); + rdpq_set_tile(TILE4, FMT_RGBA16, 0, 32, NULL); + rdpq_set_tile_autotmem(-1); + + rdpq_set_tile_autotmem(0); + rdpq_set_tile(TILE5, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(128); + rdpq_set_tile_autotmem(0); + rdpq_set_tile(TILE6, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(64); + rdpq_set_tile(TILE7, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(-1); + rdpq_set_tile_autotmem(-1); + + rspq_wait(); + + int expected[] = { 0, 128, 128+64, 0, 0, 0, 128, 128+64 }; + + int tidx = 0; + for (int i=0;i> 56) == 0xF5) { // Find all SET_TILE + // Check tile number + int tile = (rdp_stream[i] >> 24) & 7; + ASSERT_EQUAL_SIGNED(tile, tidx, "invalid tile number"); + tidx++; + + int addr = ((rdp_stream[i] >> 32) & 0x1FF) * 8; + ASSERT_EQUAL_SIGNED(addr, expected[tile], "invalid tile %d address", tile); + } + } + + ASSERT_EQUAL_SIGNED(tidx, 8, "invalid number of tiles"); +} + +void test_rdpq_autotmem_reuse(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + rdpq_set_tile_autotmem(0); + rdpq_set_tile(TILE0, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(128); + rdpq_set_tile(TILE1, FMT_RGBA16, RDPQ_AUTOTMEM, 32, NULL); + rdpq_set_tile_autotmem(64); + rdpq_set_tile(TILE2, FMT_RGBA16, RDPQ_AUTOTMEM_REUSE(0), 32, NULL); + rdpq_set_tile(TILE3, FMT_RGBA16, RDPQ_AUTOTMEM_REUSE(64), 32, NULL); + rspq_wait(); + + int expected[] = { 0, 128, 128+0, 128+64 }; + + int tidx = 0; + for (int i=0;i> 56) == 0xF5) { // Find all SET_TILE + // Check tile number + int tile = (rdp_stream[i] >> 24) & 7; + ASSERT_EQUAL_SIGNED(tile, tidx, "invalid tile number"); + tidx++; + + int addr = ((rdp_stream[i] >> 32) & 0x1FF) * 8; + ASSERT_EQUAL_SIGNED(addr, expected[tile], "invalid tile %d address", tile); + } + } + + ASSERT_EQUAL_SIGNED(tidx, 4, "invalid number of tiles"); +} + +void test_rdpq_texrect_passthrough(TestContext *ctx) { + RDPQ_INIT(); + + rspq_block_t *block; + uint32_t texrect; + + uint32_t find_block_texrect(uint32_t *cmds) { + for (int i=0; i<16; i++) { + if (cmds[i] >> 24 == 0xE4) { + return cmds[i]; + } + } + return 0; + } + + // Block with no mode setting. Must be a fixup. + rspq_block_begin(); + rdpq_texture_rectangle(TILE0, 0, 0, 16, 16, 0, 0); + block = rspq_block_end(); + ASSERT_EQUAL_HEX(block->rdp_block->cmds[0] >> 24, 0xC0, "expected NOP in block"); + rspq_block_free(block); + + // Block with standard mode. Should contain a rectangle with exclusive bounds + rspq_block_begin(); + rdpq_set_mode_standard(); + rdpq_texture_rectangle(TILE0, 0, 0, 16, 16, 0, 0); + block = rspq_block_end(); + texrect = find_block_texrect(block->rdp_block->cmds); + ASSERT_EQUAL_HEX(texrect, 0xe4040040, "expected exclusive bounds"); + rspq_block_free(block); + + // Block with copy mode. Should contain a rectangle with exclusive bounds + rspq_block_begin(); + rdpq_set_mode_copy(true); + rdpq_texture_rectangle(TILE0, 0, 0, 16, 16, 0, 0); + block = rspq_block_end(); + texrect = find_block_texrect(block->rdp_block->cmds); + ASSERT_EQUAL_HEX(texrect, 0xe403c03c, "expected inclusive bounds"); + rspq_block_free(block); + + // Block with standard mode coming from a sub-block. + // Register a block that sets the standard mode + rspq_block_begin(); + rdpq_set_mode_standard(); + rspq_block_t *block_mode = rspq_block_end(); + + rspq_block_begin(); + rspq_block_run(block_mode); + rdpq_texture_rectangle(TILE0, 0, 0, 16, 16, 0, 0); + block = rspq_block_end(); + texrect = find_block_texrect(block->rdp_block->cmds); + ASSERT_EQUAL_HEX(texrect, 0xe4040040, "expected exclusive bounds"); + rspq_block_free(block); + rspq_block_free(block_mode); + + // Block with standard mode, with a sub-block that doesn't touch mode + rspq_block_begin(); + rdpq_set_prim_color(RGBA32(0x00, 0x00, 0x00, 0x00)); + block_mode = rspq_block_end(); + + rspq_block_begin(); + rdpq_set_mode_standard(); + rspq_block_run(block_mode); + rdpq_texture_rectangle(TILE0, 0, 0, 16, 16, 0, 0); + block = rspq_block_end(); + texrect = find_block_texrect(block->rdp_block->cmds); + ASSERT_EQUAL_HEX(texrect, 0xe4040040, "expected exclusive bounds"); + rspq_block_free(block); + rspq_block_free(block_mode); +} diff --git a/tests/test_rdpq_attach.c b/tests/test_rdpq_attach.c new file mode 100644 index 0000000000..1eee02ed59 --- /dev/null +++ b/tests/test_rdpq_attach.c @@ -0,0 +1,58 @@ + +void test_rdpq_attach_clear(TestContext *ctx) +{ + RDPQ_INIT(); + + const int WIDTH = 64; + surface_t fb = surface_alloc(FMT_RGBA32, WIDTH, WIDTH); + DEFER(surface_free(&fb)); + surface_t fbz = surface_alloc(FMT_RGBA16, WIDTH, WIDTH); + DEFER(surface_free(&fbz)); + + surface_clear(&fb, 0xAA); + + rdpq_attach_clear(&fb, NULL); + rdpq_detach_wait(); + + ASSERT_SURFACE(&fb, { return RGBA32(0,0,0,0xFF); }); + + surface_clear(&fb, 0xAA); + surface_clear(&fbz, 0x22); + + rdpq_attach_clear(&fb, &fbz); + rdpq_detach_wait(); + + ASSERT_SURFACE(&fb, { return RGBA32(0,0,0,0xFF); }); + for (int i=0; i= s1surf.width) x -= s1surf.width; + if (y >= s1surf.height) y -= s1surf.height; + color_t c = color_from_packed32(((uint32_t*)s1surf.buffer)[y*s1surf.width + x]); + c.a = 0xE0; + return c; + }); +} + +void test_rdpq_sprite_lod(TestContext *ctx) +{ + RDPQ_INIT(); + + // Load a sprite that contains mipmaps. We want to check that they are + // loaded correctly and mipmap mode is configured. + sprite_t *s1 = sprite_load("rom:/grass2.rgba32.sprite"); + DEFER(sprite_free(s1)); + surface_t s1surf = sprite_get_pixels(s1); + surface_t s1lod1 = sprite_get_lod_pixels(s1, 1); + ASSERT_EQUAL_SIGNED(s1surf.width / 2, s1lod1.width, "invalid width of LOD 1"); + + surface_t fb = surface_alloc(FMT_RGBA32, s1surf.width, s1surf.height); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + float scale = 0.499999f; + float cs = 24 * scale; // this compute a scale that forces LOD_FRAC to be 1 everywhere + + rdpq_attach(&fb, NULL); + rdpq_set_mode_standard(); + rdpq_sprite_upload(TILE0, s1, NULL); + + // Draw a 12x12 rectangle with the 24x24 texture. This will blit the first + // LOD as-is. + rdpq_triangle(&TRIFMT_TEX, + (float[]){ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }, + (float[]){ cs, 0.0f, 24.0f, 0.0f, 1.0f }, + (float[]){ cs, cs, 24.0f,24.0f, 1.0f } + ); + rdpq_triangle(&TRIFMT_TEX, + (float[]){ 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }, + (float[]){ cs, cs, 24.0f,24.0f, 1.0f }, + (float[]){ 0.0f, cs, 0.0f,24.0f, 1.0f } + ); + + rdpq_detach_wait(); + + // Check with a threshold because LOD interpolation isn't bit perfect + // (as LOD_FRAC isn't 1.0f but rather 255.0/256.0) + ASSERT_SURFACE_THRESHOLD(&fb, 0x1, { + if (x <= (int)cs && y <= (int)cs) { + color_t c = color_from_packed32(((uint32_t*)s1lod1.buffer)[y*s1lod1.width + x]); + c.a = 0xE0; + return c; + } + return color_from_packed32(0); + }); +} diff --git a/tests/test_rdpq_tex.c b/tests/test_rdpq_tex.c new file mode 100644 index 0000000000..f4f2fae56e --- /dev/null +++ b/tests/test_rdpq_tex.c @@ -0,0 +1,406 @@ +#include + +static inline void surface_set_pixel(surface_t *surf, int x, int y, uint32_t value) +{ + void *ptr = surf->buffer + y * surf->stride; + + switch (surface_get_format(surf) & 3) { + case 0: // 4-bit + ptr += x/2; + if (x & 1) + *(uint8_t*)ptr = (*(uint8_t*)ptr & 0xF0) | (value & 0xF); + else + *(uint8_t*)ptr = (*(uint8_t*)ptr & 0x0F) | ((value & 0xF) << 4); + break; + case 1: // 8-bit + ptr += x; + *(uint8_t*)ptr = value; + break; + case 2: // 16-bit + ptr += x*2; + *(uint16_t*)ptr = value; + break; + case 3: // 32-bit + ptr += x*4; + *(uint32_t*)ptr = value; + break; + } +} + +static inline uint32_t surface_get_pixel(surface_t *surf, int x, int y) +{ + void *ptr = surf->buffer + y * surf->stride; + + switch (TEX_FORMAT_BITDEPTH(surface_get_format(surf))) { + case 4: + ptr += x/2; + if (x & 1) + return *(uint8_t*)ptr & 0xF; + else + return (*(uint8_t*)ptr >> 4) & 0xF; + case 8: + ptr += x; + return *(uint8_t*)ptr; + case 16: + ptr += x*2; + return *(uint16_t*)ptr; + case 32: + ptr += x*4; + return *(uint32_t*)ptr; + default: + assert(false); + } + return 0; +} + +static surface_t surface_create_random(int width, int height, tex_format_t fmt) +{ + surface_t surf = surface_alloc(fmt, width, height); + for (int j=0;j= surf->width) x = surf->width-1; + if (y >= surf->height) y = surf->height-1; + uint32_t px = surface_get_pixel(surf, x, y); + switch (surface_get_format(surf)) { + case FMT_I4: + px = (px << 4) | px; + return RGBA32(px, px, px, 0xE0); + case FMT_IA4: + px &= 0xE; + px = (px << 4) | (px << 1) | (px >> 2); + return RGBA32(px, px, px, 0xE0); + case FMT_I8: + return RGBA32(px, px, px, 0xE0); + case FMT_IA8: + px = (px & 0xF0) | (px >> 4); + return RGBA32(px, px, px, 0xE0); + case FMT_IA16: + px >>= 8; + return RGBA32(px, px, px, 0xE0); + case FMT_CI4: case FMT_CI8: { + color_t c = palette_debug_color(px); + c.r &= 0xF8; c.r |= c.r >> 5; + c.g &= 0xF8; c.g |= c.g >> 5; + c.b &= 0xF8; c.b |= c.b >> 5; + c.a = 0xE0; + return c; + } + case FMT_RGBA16: { + color_t c = color_from_packed16(px); + c.r &= 0xF8; c.r |= c.r >> 5; + c.g &= 0xF8; c.g |= c.g >> 5; + c.b &= 0xF8; c.b |= c.b >> 5; + c.a = 0xE0; + return c; + } + case FMT_RGBA32: { + color_t c = color_from_packed32(px); + c.a = 0xE0; + return c; + } + default: + assertf(0, "Unhandled format %s", tex_format_name(surface_get_format(surf))); + } +} + +void test_rdpq_tex_upload(TestContext *ctx) { + RDPQ_INIT(); + + static const tex_format_t fmts[] = { + FMT_RGBA32, + FMT_RGBA16, FMT_IA16, + FMT_CI8, FMT_I8, FMT_IA8, + FMT_CI4, FMT_I4, FMT_IA4, + }; + + const int FBWIDTH = 32; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t* tlut = malloc_uncached(256*2); + for (int i=0;i<256;i++) { + tlut[i] = color_to_packed16(palette_debug_color(i)); + } + + rdpq_attach(&fb, NULL); + DEFER(rdpq_detach()); + rdpq_set_mode_standard(); + + for (int i=0; i= 5 && x < 5+surf.width-off && y >= 5 && y < 5+surf.width-off) + return surface_debug_expected_color(&surf, x-5+off, y-5+off); + else + return color_from_packed32(0); + }); + } + } + } + } +} + +void test_rdpq_tex_upload_multi(TestContext *ctx) { + RDPQ_INIT(); + + surface_t tex1 = surface_alloc(FMT_RGBA32, 8, 8); + DEFER(surface_free(&tex1)); + surface_t tex2 = surface_alloc(FMT_RGBA32, 8, 8); + DEFER(surface_free(&tex2)); + surface_t empty = surface_alloc(FMT_RGBA32, 32, 32); + DEFER(surface_free(&empty)); + + const int FBWIDTH = 32; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + surface_clear(&empty, 0x0); + surface_clear(&tex1, 0x24); + surface_clear(&tex2, 0x10); + + void do_test(void) { + // Combine them via addition + rdpq_attach(&fb, NULL); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER2( + (1, 0, TEX0, TEX1), (0, 0, 0, 0), + (0,0,0,COMBINED), (0,0,0,COMBINED))); + rdpq_texture_rectangle(TILE1, 0, 0, 8, 8, 0, 0); + rdpq_detach(); + rspq_wait(); + + // Check result + ASSERT_SURFACE(&fb, { + if (x < 8 && y < 8) + return color_from_packed32(0x343434e0); + else + return color_from_packed32(0x0); + }); + } + + // Clear tmem + rdpq_tex_upload(TILE0, &empty, NULL); + + // Load the two textures to TMEM + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE1, &tex1, NULL); + rdpq_tex_upload(TILE2, &tex2, NULL); + rdpq_tex_multi_end(); + do_test(); + if (ctx->result == TEST_FAILED) + return; + + // Create loader blocks + rspq_block_begin(); + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE1, &tex1, NULL); + rdpq_tex_multi_end(); + rspq_block_t *tex1_loader = rspq_block_end(); + DEFER(rspq_block_free(tex1_loader)); + + rspq_block_begin(); + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE2, &tex2, NULL); + rdpq_tex_multi_end(); + rspq_block_t *tex2_loader = rspq_block_end(); + DEFER(rspq_block_free(tex2_loader)); + + // Load the two textures to TMEM via block loading + rdpq_tex_upload(TILE0, &empty, NULL); + rdpq_tex_multi_begin(); + rspq_block_run(tex1_loader); + rspq_block_run(tex2_loader); + rdpq_tex_multi_end(); + do_test(); + if (ctx->result == TEST_FAILED) + return; + + // Load one texture via block loading and the other normally + rdpq_tex_upload(TILE0, &empty, NULL); + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE1, &tex1, NULL); + rspq_block_run(tex2_loader); + rdpq_tex_multi_end(); + do_test(); + if (ctx->result == TEST_FAILED) + return; + + // Create a block that contains both tiles + rspq_block_begin(); + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE1, &tex1, NULL); + rdpq_tex_upload(TILE2, &tex2, NULL); + rdpq_tex_multi_end(); + rspq_block_t *tex1_tex2_loader = rspq_block_end(); + + // Load them both via block loading + rdpq_tex_upload(TILE0, &empty, NULL); + rspq_block_run(tex1_tex2_loader); + do_test(); + if (ctx->result == TEST_FAILED) + return; + + // Load them both via block loading, with explicit multi + rdpq_tex_upload(TILE0, &empty, NULL); + rdpq_tex_multi_begin(); + rspq_block_run(tex1_tex2_loader); + rdpq_tex_multi_end(); + do_test(); + if (ctx->result == TEST_FAILED) + return; + +} + +void test_rdpq_tex_multi_i4(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + const int FBWIDTH = 128; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + surface_t surf = surface_alloc(FMT_I4, 124, 63); + DEFER(surface_free(&surf)); + surface_clear(&surf, 0xAA); + + // Make sure we can correctly load a large I4 surface. We had a bug where + // the autotmem engine was confuse by a CI8 internal tile used to perform + // the upload. + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE0, &surf, NULL); + rdpq_tex_multi_end(); + + rdpq_set_color_image(&fb); + rdpq_set_mode_standard(); + rdpq_texture_rectangle(TILE0, 0, 0, 124, 63, 0, 0); + rspq_wait(); + + ASSERT_SURFACE(&fb, { + if (x < 124 && y < 63) + return color_from_packed32(0xAAAAAAE0); + else + return color_from_packed32(0x00); + }); +} + +void test_rdpq_tex_blit_normal(TestContext *ctx) +{ + RDPQ_INIT(); + + static const tex_format_t fmts[] = { + FMT_RGBA32, + FMT_RGBA16, FMT_IA16, + FMT_CI8, FMT_I8, FMT_IA8, + FMT_CI4, FMT_I4, FMT_IA4, + }; + + const int FBWIDTH = 32; + surface_t fb = surface_alloc(FMT_RGBA32, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + uint16_t* tlut = malloc_uncached(256*2); + for (int i=0;i<256;i++) { + tlut[i] = color_to_packed16(palette_debug_color(i)); + } + + rdpq_attach(&fb, NULL); + DEFER(rdpq_detach()); + rdpq_set_mode_standard(); + + for (int i=0; itex_width-s0-3; width--) { + LOG(" s0/t0/w: %d %d %d\n", s0, t0, width); + rdpq_tex_blit(&surf_full, 0, 0, &(rdpq_blitparms_t){ + .s0 = s0, .width = width, .t0 = t0, .height = tex_width-t0, + }); + rspq_wait(); + + ASSERT_SURFACE(&fb, { + return surface_debug_expected_color(&surf_full, x+s0, y+t0); + }); + } + } + } +} diff --git a/tests/test_rdpq_tri.c b/tests/test_rdpq_tri.c new file mode 100644 index 0000000000..00db363710 --- /dev/null +++ b/tests/test_rdpq_tri.c @@ -0,0 +1,194 @@ + +void test_rdpq_triangle(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + const int FBWIDTH = 16; + surface_t fb = surface_alloc(FMT_RGBA16, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + rdpq_set_color_image(&fb); + rdpq_set_tile(TILE4, FMT_RGBA16, 0, 64, 0); + rdpq_set_tile_size(TILE4, 0, 0, 32, 32); + rdpq_set_mode_standard(); + rdpq_mode_mipmap(MIPMAP_NEAREST, 3); + rdpq_set_prim_color(RGBA32(255,255,255,0)); + rdpq_mode_combiner(RDPQ_COMBINER_TEX_SHADE); + rspq_wait(); + + // Generate floating point coordinates that maps perfectly to fixed point numbers of the expected + // precision. What we want to test here is the accuracy of the RSP implementation, which receives + // fixed point numbers as input. If an error is introduced in input data, it just accumulates + // through the algorithm but it doesn't give us actionable information. + #define RF(min,max) (((float)myrand() / (float)0xFFFFFFFF) * ((max)-(min)) + (min)) + #define RS16() ((int)(RANDN(65536) - 32768)) + #define RFCOORD() ((int)(RANDN(32768) - 16384) / 4.0f) + #define RFZ() (RANDN(0x8000) / 32767.f) + #define RFRGB() (RANDN(256) / 255.0f) + #define RFW() RF(0.0f, 1.0f) + #define RFTEX() (RS16() / 64.f) // Use s9.5 here because the RSP code has a bug for spanning too much in s10.5 space + #define SAT16(x) ((x) == 0x7FFF || (x) == 0x8000) + + #define TRI_CHECK(idx, start, end, msg) ({ \ + if (BITS(tcpu[idx], start, end) != BITS(trsp[idx], start, end)) { \ + debugf("CPU[%d]:\n", tri); rdpq_debug_disasm(tcpu, stderr); \ + debugf("RSP[%d]:\n", tri); rdpq_debug_disasm(trsp, stderr); \ + ASSERT_EQUAL_HEX(BITS(tcpu[idx], start, end), BITS(trsp[idx], start, end), msg); \ + } \ + }) + + #define TRI_CHECK_F1616(idxi, starti, idxf, startf, threshold, msg) ({ \ + float __fcpu = (int16_t)BITS(tcpu[idxi], starti, starti+15), __frsp = (int16_t)BITS(trsp[idxi], starti, starti+15); \ + __fcpu += (float)BITS(tcpu[idxf], startf, startf+15) / 65536.0f; __frsp += (float)BITS(trsp[idxf], startf, startf+15) / 65536.0f; \ + if (fabsf(__frsp - __fcpu) > threshold) { \ + debugf("CPU[%d]:\n", tri); rdpq_debug_disasm(tcpu, stderr); \ + debugf("RSP[%d]:\n", tri); rdpq_debug_disasm(trsp, stderr); \ + ASSERT_EQUAL_FLOAT(__fcpu, __frsp, msg " (error: %.2f)", fabsf(__frsp - __fcpu)); \ + } \ + }) + + const rdpq_trifmt_t trifmt = (rdpq_trifmt_t){ + .pos_offset = 0, .z_offset = 2, .tex_offset = 3, .shade_offset = 6, .tex_tile = TILE4 + }; + + for (int tri=0;tri<1024;tri++) { + if (tri == 849) continue; // this has a quasi-degenerate edge. The results are different but it doesn't matter + SRAND(tri+1); + float v1[] = { RFCOORD(), RFCOORD(), RFZ(), RFTEX(),RFTEX(),RFW(), RFRGB(), RFRGB(), RFRGB(), RFRGB() }; + float v2[] = { RFCOORD(), RFCOORD(), RFZ(), RFTEX(),RFTEX(),RFW(), RFRGB(), RFRGB(), RFRGB(), RFRGB() }; + float v3[] = { RFCOORD(), RFCOORD(), RFZ(), RFTEX(),RFTEX(),RFW(), RFRGB(), RFRGB(), RFRGB(), RFRGB() }; + + // skip degenerate triangles + if(v1[0] == v2[0] || v2[0] == v3[0] || v1[0] == v3[0]) continue; + if(v1[1] == v2[1] || v2[1] == v3[1] || v1[1] == v3[1]) continue; + + debug_rdp_stream_reset(); + rdpq_debug_log_msg("CPU"); + rdpq_triangle_cpu(&trifmt, v1, v2, v3); + rdpq_debug_log_msg("RSP"); + rdpq_triangle_rsp(&trifmt, v1, v2, v3); + rspq_wait(); + + const int RDP_TRI_SIZE = 22; + uint64_t *tcpu = &rdp_stream[1]; + uint64_t *trsp = &rdp_stream[RDP_TRI_SIZE+1+1]; + + ASSERT_EQUAL_HEX((tcpu[0] >> 56), 0xCF, "invalid RDP primitive value (by CPU)"); + ASSERT_EQUAL_HEX((trsp[0] >> 56), 0xCF, "invalid RDP primitive value (by RSP)"); + + uint8_t cmd = tcpu[0] >> 56; + TRI_CHECK(0, 48, 63, "invalid command header (top 16 bits)"); + TRI_CHECK(0, 32, 45, "invalid YL"); + TRI_CHECK(0, 16, 29, "invalid YM"); + TRI_CHECK(0, 0, 13, "invalid YH"); + TRI_CHECK_F1616(1,48, 1,32, 0.05f, "invalid XL"); + TRI_CHECK_F1616(2,48, 2,32, 0.05f, "invalid XH"); + TRI_CHECK_F1616(3,48, 3,32, 0.05f, "invalid XM"); + TRI_CHECK_F1616(1,16, 1, 0, 0.05f, "invalid ISL"); + TRI_CHECK_F1616(2,16, 2, 0, 0.05f, "invalid ISH"); + TRI_CHECK_F1616(3,16, 3, 0, 0.05f, "invalid ISM"); + + int off = 4; + if (cmd & 4) { + TRI_CHECK_F1616(off+0,48, off+2,48, 0.6f, "invalid Red"); + TRI_CHECK_F1616(off+0,32, off+2,32, 0.6f, "invalid Green"); + TRI_CHECK_F1616(off+0,16, off+2,16, 0.6f, "invalid Blue"); + TRI_CHECK_F1616(off+0,0, off+2,0, 0.6f, "invalid Alpha"); + + TRI_CHECK_F1616(off+1,48, off+3,48, 0.8f, "invalid DrDx"); + TRI_CHECK_F1616(off+1,32, off+3,32, 0.8f, "invalid DgDx"); + TRI_CHECK_F1616(off+1,16, off+3,16, 0.8f, "invalid DbDx"); + TRI_CHECK_F1616(off+1,0, off+3,0, 0.8f, "invalid DaDx"); + + TRI_CHECK_F1616(off+4,48, off+6,48, 0.8f, "invalid DrDe"); + TRI_CHECK_F1616(off+4,32, off+6,32, 0.8f, "invalid DgDe"); + TRI_CHECK_F1616(off+4,16, off+6,16, 0.8f, "invalid DbDe"); + TRI_CHECK_F1616(off+4,0, off+6,0, 0.8f, "invalid DaDe"); + + TRI_CHECK_F1616(off+5,48, off+7,48, 0.8f, "invalid DrDy"); + TRI_CHECK_F1616(off+5,32, off+7,32, 0.8f, "invalid DgDy"); + TRI_CHECK_F1616(off+5,16, off+7,16, 0.8f, "invalid DbDy"); + TRI_CHECK_F1616(off+5,0, off+7,0, 0.8f, "invalid DaDy"); + + off += 8; + } + + if (cmd & 2) { + // Skip checks for saturated W/INVW, the results would be too different + uint16_t invw_i = tcpu[off+0]>>16; + if (!SAT16(invw_i)) + { + TRI_CHECK_F1616(off+0,48, off+2,48, 5.0f, "invalid S"); + TRI_CHECK_F1616(off+0,32, off+2,32, 5.0f, "invalid T"); + TRI_CHECK_F1616(off+0,16, off+2,16, 8.0f, "invalid INVW"); + + TRI_CHECK_F1616(off+1,48, off+3,48, 3.0f, "invalid DsDx"); + TRI_CHECK_F1616(off+1,32, off+3,32, 3.0f, "invalid DtDx"); + TRI_CHECK_F1616(off+1,16, off+3,16, 0.8f, "invalid DwDx"); + + TRI_CHECK_F1616(off+5,48, off+7,48, 3.0f, "invalid DsDy"); + TRI_CHECK_F1616(off+5,32, off+7,32, 3.0f, "invalid DtDy"); + TRI_CHECK_F1616(off+5,16, off+7,16, 0.8f, "invalid DwDy"); + + // Skip checks for De components if Dx or Dy saturated. + uint16_t dwdx_i = tcpu[off+1]>>16, dwdy_i = tcpu[off+5]>>16; + if (!SAT16(dwdx_i) && !SAT16(dwdy_i)) { + TRI_CHECK_F1616(off+4,48, off+6,48, 3.0f, "invalid DsDe"); + TRI_CHECK_F1616(off+4,32, off+6,32, 3.0f, "invalid DtDe"); + TRI_CHECK_F1616(off+4,16, off+6,16, 0.8f, "invalid DwDe"); + } + } + + off += 8; + } + + if (cmd & 1) { + TRI_CHECK_F1616(off+0,48, off+0,32, 1.2f, "invalid Z"); + TRI_CHECK_F1616(off+0,16, off+0,0, 0.8f, "invalid DzDx"); + TRI_CHECK_F1616(off+1,16, off+1,0, 0.8f, "invalid DzDy"); + + // If DzDx or DzDy are saturated, avoid checking DzDe as it won't match anyway + uint16_t dzdx_i = trsp[off+0]>>16, dzdy_i = trsp[off+1]>>16; + if (!SAT16(dzdx_i) && !SAT16(dzdy_i)) + TRI_CHECK_F1616(off+1,48, off+1,32, 0.6f, "invalid DzDe"); + off += 2; + } + } +} + +void test_rdpq_triangle_w1(TestContext *ctx) { + RDPQ_INIT(); + debug_rdp_stream_init(); + + const int FBWIDTH = 16; + const int TEXWIDTH = FBWIDTH - 8; + surface_t fb = surface_alloc(FMT_RGBA16, FBWIDTH, FBWIDTH); + DEFER(surface_free(&fb)); + surface_clear(&fb, 0); + + surface_t tex = surface_alloc(FMT_RGBA16, TEXWIDTH, TEXWIDTH); + DEFER(surface_free(&tex)); + surface_clear(&tex, 0); + + rdpq_set_color_image(&fb); + rdpq_tex_upload(TILE0, &tex, NULL); + rdpq_set_mode_standard(); + rspq_wait(); + + // Draw a triangle with W=1. This is a typical triangle calculated + // with an orthogonal projection. It triggers a special case in the + // RSP code because W = 1/W, so we want to make sure we have no bugs. + debug_rdp_stream_reset(); + rdpq_triangle(&TRIFMT_TEX, + (float[]){ 4.0f, 4.0f, 0.0f, 0.0f, 1.0f }, + (float[]){ 12.0f, 4.0f, 8.0f, 0.0f, 1.0f }, + (float[]){ 12.0f, 12.0f, 8.0f, 8.0f, 1.0f } + ); + rspq_wait(); + + // Check that we find a triangle command in the stream, and that the W + // coordinate is correct (saturated 0x7FFF value in the upper 16 bits). + ASSERT_EQUAL_HEX(BITS(rdp_stream[0],56,61), RDPQ_CMD_TRI_TEX, "invalid command"); + ASSERT_EQUAL_HEX(BITS(rdp_stream[4],16,31), 0x7FFF, "invalid W coordinate"); +} diff --git a/tests/test_rspq.c b/tests/test_rspq.c index 25336c7324..4c9333e995 100644 --- a/tests/test_rspq.c +++ b/tests/test_rspq.c @@ -3,8 +3,11 @@ #include #include +#include +#include #define ASSERT_GP_BACKWARD 0xF001 // Also defined in rsp_test.S +#define ASSERT_TOO_MANY_NOPS 0xF002 static void test_assert_handler(rsp_snapshot_t *state, uint16_t assert_code) { @@ -12,6 +15,9 @@ static void test_assert_handler(rsp_snapshot_t *state, uint16_t assert_code) case ASSERT_GP_BACKWARD: printf("GP moved backward\n"); break; + case ASSERT_TOO_MANY_NOPS: + printf("Trying to send too many NOPs (%ld)\n", state->gpr[4]); + break; default: printf("Unknown assert\n"); break; @@ -86,6 +92,16 @@ void rspq_test_reset_log(void) rspq_write(test_ovl_id, 0x7); } +void rspq_test_send_rdp(uint32_t value) +{ + rdpq_write(1, test_ovl_id, 0xA, 0, value); +} + +void rspq_test_send_rdp_nops(int num_nops) +{ + rdpq_write(num_nops, test_ovl_id, 0xB, num_nops); +} + void rspq_test_big_out(void *dest) { rspq_write(test_ovl_id, 0x9, 0, PhysicalAddr(dest)); @@ -193,9 +209,9 @@ void test_rspq_signal(TestContext *ctx) { TEST_RSPQ_PROLOG(); - rspq_signal(SP_WSTATUS_SET_SIG0 | SP_WSTATUS_SET_SIG1); + rspq_signal(SP_WSTATUS_SET_SIG0); - TEST_RSPQ_EPILOG(SP_STATUS_SIG0 | SP_STATUS_SIG1, rspq_timeout); + TEST_RSPQ_EPILOG(SP_STATUS_SIG0, rspq_timeout); } void test_rspq_high_load(TestContext *ctx) @@ -744,3 +760,66 @@ void test_rspq_big_command(TestContext *ctx) ASSERT_EQUAL_MEM((uint8_t*)output, (uint8_t*)expected, 128, "Output does not match!"); } + +void test_rspq_rdp_dynamic(TestContext *ctx) +{ + TEST_RSPQ_PROLOG(); + test_ovl_init(); + + const uint32_t count = 0x80; + + for (uint32_t i = 0; i < count; i++) + { + rspq_test_send_rdp(i); + } + + TEST_RSPQ_EPILOG(0, rspq_timeout); + + extern void *rspq_rdp_dynamic_buffers[2]; + + ASSERT_EQUAL_HEX(*DP_START, PhysicalAddr(rspq_rdp_dynamic_buffers[0]), "DP_START does not match!"); + ASSERT_EQUAL_HEX(*DP_END, PhysicalAddr(rspq_rdp_dynamic_buffers[0]) + count * 8, "DP_END does not match!"); + + uint64_t *rdp_buf = (uint64_t*)rspq_rdp_dynamic_buffers[0]; + + for (uint64_t i = 0; i < count; i++) + { + ASSERT_EQUAL_HEX(rdp_buf[i], i, "Wrong command at idx: %llx", i); + } +} + +void test_rspq_rdp_dynamic_switch(TestContext *ctx) +{ + TEST_RSPQ_PROLOG(); + test_ovl_init(); + + const uint32_t full_count = RDPQ_DYNAMIC_BUFFER_SIZE / 8; + const uint32_t extra_count = 8; + const uint32_t count = full_count + extra_count; + + for (uint32_t i = 0; i < count; i++) + { + rspq_test_send_rdp(i); + } + + TEST_RSPQ_EPILOG(0, rspq_timeout); + + extern void *rspq_rdp_dynamic_buffers[2]; + + ASSERT_EQUAL_HEX(*DP_START, PhysicalAddr(rspq_rdp_dynamic_buffers[1]), "DP_START does not match!"); + ASSERT_EQUAL_HEX(*DP_END, PhysicalAddr(rspq_rdp_dynamic_buffers[1]) + extra_count * 8, "DP_END does not match!"); + + uint64_t *rdp_buf0 = (uint64_t*)rspq_rdp_dynamic_buffers[0]; + uint64_t *rdp_buf1 = (uint64_t*)rspq_rdp_dynamic_buffers[1]; + + for (uint64_t i = 0; i < full_count; i++) + { + ASSERT_EQUAL_HEX(rdp_buf0[i], i, "Wrong command at idx: %llx", i); + } + + for (uint64_t i = 0; i < extra_count; i++) + { + ASSERT_EQUAL_HEX(rdp_buf1[i], i + full_count, "Wrong command at idx: %llx", i); + } +} + diff --git a/tests/testrom.c b/tests/testrom.c index 941fdd0fcc..8dfe32c02f 100644 --- a/tests/testrom.c +++ b/tests/testrom.c @@ -206,6 +206,11 @@ int assert_equal_mem(TestContext *ctx, const char *file, int line, const uint8_t #include "test_constructors.c" #include "test_backtrace.c" #include "test_rspq.c" +#include "test_rdpq.c" +#include "test_rdpq_tri.c" +#include "test_rdpq_tex.c" +#include "test_rdpq_attach.c" +#include "test_rdpq_sprite.c" /********************************************************************** * MAIN @@ -273,6 +278,48 @@ static const struct Testsuite TEST_FUNC(test_rspq_highpri_multiple, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_highpri_overlay, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_big_command, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rspq_rdp_dynamic, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rspq_rdp_dynamic_switch, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_rspqwait, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_clear, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_dynamic, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_passthrough_big, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_block, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_block_coalescing, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_block_contiguous, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_block_dynamic, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_change_other_modes, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_fixup_setfillcolor, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_fixup_setscissor, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_fixup_texturerect, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_fixup_fillrect, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_lookup_address, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_lookup_address_offset, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_syncfull_cb, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_syncfull_resume, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_autosync, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_automode, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_blender, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_blender_memory, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_fog, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_mode_antialias, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_mode_alphacompare, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_mode_freeze, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_mode_freeze_stack, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_mipmap, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_autotmem, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_autotmem_reuse, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_texrect_passthrough, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_triangle, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_triangle_w1, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_attach_clear, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_attach_stack, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_tex_upload, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_tex_upload_multi, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_tex_blit_normal, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_tex_multi_i4, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_sprite_upload, 0, TEST_FLAGS_NO_BENCHMARK), + TEST_FUNC(test_rdpq_sprite_lod, 0, TEST_FLAGS_NO_BENCHMARK), }; int main() { From 935d025540dc707886efa4f81e8421bc049651b6 Mon Sep 17 00:00:00 2001 From: Dennis Heinze Date: Thu, 25 Jan 2024 23:04:36 +0100 Subject: [PATCH 06/48] Add rdpqdemo Co-authored-by: Giovanni Bajo Co-authored-by: SpookyIluha <127010686+SpookyIluha@users.noreply.github.com> --- examples/Makefile | 1 + examples/rdpqdemo/.gitignore | 1 + examples/rdpqdemo/Makefile | 33 +++++ examples/rdpqdemo/assets/n64brew.png | Bin 0 -> 6193 bytes examples/rdpqdemo/assets/tiles.png | Bin 0 -> 921 bytes examples/rdpqdemo/rdpqdemo.c | 197 +++++++++++++++++++++++++++ 6 files changed, 232 insertions(+) create mode 100644 examples/rdpqdemo/.gitignore create mode 100644 examples/rdpqdemo/Makefile create mode 100644 examples/rdpqdemo/assets/n64brew.png create mode 100644 examples/rdpqdemo/assets/tiles.png create mode 100644 examples/rdpqdemo/rdpqdemo.c diff --git a/examples/Makefile b/examples/Makefile index 9a6ca06170..f6e7bc7e16 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -6,6 +6,7 @@ EXAMPLES += eepromfstest EXAMPLES += mixertest EXAMPLES += cpaktest EXAMPLES += cpak-utest +EXAMPLES += rdpqdemo EXAMPLES += rspqdemo EXAMPLES += spritemap EXAMPLES += test diff --git a/examples/rdpqdemo/.gitignore b/examples/rdpqdemo/.gitignore new file mode 100644 index 0000000000..87ef668156 --- /dev/null +++ b/examples/rdpqdemo/.gitignore @@ -0,0 +1 @@ +filesystem/ \ No newline at end of file diff --git a/examples/rdpqdemo/Makefile b/examples/rdpqdemo/Makefile new file mode 100644 index 0000000000..a901b7f0a5 --- /dev/null +++ b/examples/rdpqdemo/Makefile @@ -0,0 +1,33 @@ +BUILD_DIR=build +include $(N64_INST)/include/n64.mk + +src = rdpqdemo.c +assets_png = $(wildcard assets/*.png) + +assets_conv = $(addprefix filesystem/,$(notdir $(assets_png:%.png=%.sprite))) + +AUDIOCONV_FLAGS ?= +MKSPRITE_FLAGS ?= + +all: rdpqdemo.z64 + +filesystem/%.sprite: assets/%.png + @mkdir -p $(dir $@) + @echo " [SPRITE] $@" + @$(N64_MKSPRITE) $(MKSPRITE_FLAGS) -o filesystem "$<" + +filesystem/n64brew.sprite: MKSPRITE_FLAGS=--format RGBA16 --tiles 32,32 +filesystem/tiles.sprite: MKSPRITE_FLAGS=--format CI4 --tiles 32,32 + +$(BUILD_DIR)/rdpqdemo.dfs: $(assets_conv) +$(BUILD_DIR)/rdpqdemo.elf: $(src:%.c=$(BUILD_DIR)/%.o) + +rdpqdemo.z64: N64_ROM_TITLE="RSPQ Demo" +rdpqdemo.z64: $(BUILD_DIR)/rdpqdemo.dfs + +clean: + rm -rf $(BUILD_DIR) rdpqdemo.z64 + +-include $(wildcard $(BUILD_DIR)/*.d) + +.PHONY: all clean diff --git a/examples/rdpqdemo/assets/n64brew.png b/examples/rdpqdemo/assets/n64brew.png new file mode 100644 index 0000000000000000000000000000000000000000..106eb6b4a7ebf91de56e5ade6886b14c9cf9aab0 GIT binary patch literal 6193 zcmV-17|!R3P)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!TtYjr2By0(bHUX5alnR9c?p7^QtWpj|sz{5r^!v2p(po;ZV(WsUU->*1 zELFfFrxaNQ0V@^+QFbuwfj|h^GnvfH{ht3j?_}Ot0!avXP7lA+M`qrcdEa}V``l-_ z_oh}{9x_C35lI#)5%G%r$72)up2&M5<(g(ufF^=HB9DvYiu^~CDl$`KpNQ(18LJ)| zm!h`MOHzM-zMZ;ZaDsBi*r^D&$UjAH6Zyu_jDOpT7?HK{PTP>-*E%urRx57kUx&EZ z2EZeq@M%^JMW>v2XN87^>*}$)$g6~#<&zIZE;|>Yp&18A5!sPkkeOTeKnM5~ zZ>^z!tP7LU&th!KSrnIbz}(e0;N4B%!HM#Ah;h^dC#S(xbLJKK@xmUW>Jts2;<3`#r zxWJ0PuJU5VW*2Jf8ff4fMXpU5oRd^@Pnz^A^tKkC9Sv_j1fR=pT=C7BwObYl^-u6w5cf^h!B+l~|A4c}6M~q#pp3f3HJz5%v_trWve}xM>zpjH)SSFwR zKqU00E#!O(ctW#VpPu+?J|5{+j2@Qi$VYJkiC6dTkHv>e({ab>R9L;CW^JiUkkOwe zJ|WWFO$t!$dS_`9ei?^{x|N`tRvY;&N!eVQ1M9x(gXyoe$DW>PJEJLEr165{+`q&9=y-e9t@@>g;t{degG2+MR zjaFben+5k@CdpvRd#`*^?xV5O1`dlC&bmk7otgr7V$dJ^pJhR&(~WK`Z^7ox10)2< zZ)G~skm7yo+Drk>&I70^t8}13rsX5KNvMjEOgF5O`0f!Gh89ax<;$9kzIWBlaEkTZ zcFle)`OT}C^^?D$Fsl^G<9CKilwp}7N=gcChW}YUlzKIMTAT^5D2koG6ohplgb|v} zGSoIYQ}2+ODzSnT@$5qm*N6&DA4)&6Dt&mTAP$|&I$~-{E$05>72H2!DSD+I!QlKo zxNr1IjJjqg>Z+1(bl5snWt56xG%*e=AHKW>sxs8GI@DpftG4pxED>o$>uI(Hn4uNV z8W4+faSr23kdZ}o__@`El|we-sWEfWDXv)3B|}Lvh&GQG-|hMp=KtyiEa-X!IeYs^ zD=y9GHiuRzfRx#N_|uSNXklL8Dsy#psRIQ&yW_@#S7T9CdsMM7qz=S#zOKl^vrEUK zVCrkQX~yqxRsaWW_BBovC$atsMbn+859_KNxcg8JdhRa3&x;Ci zgByT5)f}iJh0}bi0hYwi&6l6)c+R7;BZQ$6@n3*b{!qeYt7hL#gEpp$6qR!C1=r1k1JW6P}0>n`{OY%b+NGn+0*@l00Y6J6? z`@2`%081s;S1vuRumA~g9HE*-Bb)uZq~;tMz>noE*$VV-r4l)+(T z38%vPq0r8y85b@MO%_8jF)_n4+P5ES-B*iK3wNV(YcZ<7C_(+d&%#r@9qsJJNK3dH zRh^DVjvpNJO+jN?U3JcOxUdEPNa*Ysi#~mvMDz=H)YwfW$4|zxBse%g9i^{%9JTs|LF!e zAL@w)ng31qT!XDV6RtTwlQ|t+-DYS1U|o#qSOt&Gfi+qj?x`6i(X zpS39sG%*Fl#B*c5hC@4yiD#-XyR;0o)m!1N-G}P4pF-7afYnz7iz8h!#h@=RO@Tj( zbQF;s8gxjU4pLz?pMV8$Ir_D5pFc@xE($Q!ZKpfRC!&|i(SAVMjx8Hs{1`Qg7`1p7nK71G-fBZ4ltXYF& z$4($2?K;>qCZX2lgwy>2%1`{2Kj+>=XAu@7wIbX@_@#)ho#w!%a`$C-mo8n>-hcmn zTzl=c`Ufpx)~s20^s(O{c4h&bHYcn{>)|-X=rFrcieG@ey;}~ zd{FJ#v&Z-(NJT}38a{kD=O7aY?qW5YX(CEY&A3&|*|~G4ij9rcJ1(Q8zW(}a<#xMO zZEdY8DJfCQmoHa$-F25rN=j52xuaEf&z~u~JxA*=+4B7Abxb6Vb0B_)Lgl>ZLRq^ina z22;sRArS@t_{Tr6e*Jn#xYeH{BaYp>ck4+Kv0AOV65F+FhlGR#h(nf?S)sDB65<;m z5sB&NXCe=Ze8JxYh^Bx9k&}|ULQ_>G7|?j{RNx$)Z%D~8~e)>akjDn zXUl6PRdpgWGYjHO^)xx7eHOAiWTAa#ro2xjL{M2Do*2sgY z7+riQtM;M1)+-HCh>NkHuEB?oc6#v2>^T@ee!TzYQTXVik8tOmcj6c0`r-F)tTnE- zj?X{;9Aedwh&5l{^3;n?wCxxkg0gTVBd9+o6H9dFHKl;)MUcB({$upSB0m)cgy5;y%hgciLfAFFopNJ32KS1` z>;GJdJ8${`wgkT)k&q?|IF4@VUc53X1LH?XbA004JR-KBv*hQf#=oqZ8lm5syx3b{;+Q zcb@p%Qssv(oYGoYHVdmoxE?TEWQ@oiB7G%y+5h5;FY1mk02L;%&pCWSqA5T>=$D?J zuD@x-#}l7aw$Ui7!x%yXB@g0vE{t)5X5s&?C?D?7>XzY7vPr&VL&NXMk@|v>f}t=# zw{G2xSHW5#ngTKyW*AmN;2iX_&M=6u03M$+t@bpH=|>mD_%0VmnePN)8x1Rs;;nTK z)z{ZE^cin~jvYHf5NCg$Ery)Yy8p2j1j77(_tQ4hYC;!4m2ix zIt-BlouEF#;R;{~5d&x!m@ADaDxh(A0#m>=;+~c%fVbLwum(dPIiamNGaBXDPvR@j zL`4({X|%~c zt)hU&{>ya_&n=rizW{Fe?SxrJaP9YhjicUs@x{?R6nnmp#2!y##TRvW`GeE?3Driz zq5v^vtI;fg3CRg2NURG2Fyj<>W8D$Y}+J+>oOZWD_O>{CI#r`9EGD?RzG(xs61Aa26|Eut^nG;7PS!4>wo~#R7=>VWea^k zcCsBm9hr=p;|rvpHh7hofOvcNnKiKc&S3f&83glL16o*=Y|vPoVFeqF{Zjqu3nu~S zIE@IWhr(zIFl#o5RDz8o%tDA_LWPaagO06SwuEH>EWG8>`od$_D-#_)0cO?@Hf_Shi4!AA`||R}a9Ydp3&68BdnZkrnF4w`Y>4gzFY)N>h~o)Mhoiv)h*jX0En7fq9y@j{^7Hd~uF-ew*s)FW75-m) z=*P^=t>*ug#nOlk5y7|aPN0hUwBPe-4lx$1I9#U0k`$?$lm1Y6sSyXL4fq4JjyD(= zq($PCA9&yaeP@Wm)lJ|3lK!Xw@M!9 z3Qt1#(@Pxt!;xf6EBp<#ZlMD-V#ElnTepsZo_|_T569Gx1pplrqXHKcqwd<6r+}VF zM5MubArx)9>oRv*SA87)OYQXJ{@jP4Z=u7@ z7zAzLOELxi+xR?)QQUBWBFua*j9CF`rUix|ws>fLe=G6#LukX^f!1p(wA8-(^~N$- zFO*m-T+FnP-v;g{uQiGFbavW+S^BdrgWrIbJyLhN;h56l@RghkVjjkSck9+|0N2sE zCU8NR{ned>sxAU5#otsFLDh-xsV|i{BxcIoKvzycF#+Ysy%|b5stl(#NkG}zI2pyH z9wObLi6Bvo561$+m!p zBiZqBS^A2l%o`t4@f9Pj-e8*3vvCGfbqbUErcauU@_MJeq$#!|f$rW5fvG zLu?DUc)`sK`lDUDc46bjjrtiuzL&wE$7QE^Y;2JTYkt#;ZOb^Hw`0$^dBD@;R7yV) z?xry7urOLM{BVJh%HRP?#$(nJOs1$DuG+E`Wqv`Cfz*$zFlFT-Cki`SQc|K zLPV?zL>4L`3}$H@L6&)}y110hG?st)u}g$S2vga{{Jkw{sREjU_hF~Gn8=e$0pZFC zuhYj+0?vxUl9WTt)Q)K^mB4Ee8oI(2u5g7bT;U2=xWW~#aD^+hDZu{&?u6|QWS^Eq P00000NkvXXu0mjfDs$>n literal 0 HcmV?d00001 diff --git a/examples/rdpqdemo/assets/tiles.png b/examples/rdpqdemo/assets/tiles.png new file mode 100644 index 0000000000000000000000000000000000000000..ff9aca6cd0c62a65075b57625a89bfb1fe6e9865 GIT binary patch literal 921 zcmeAS@N?(olHy`uVBq!ia0vp^4j|0I3?%1nZ+ru!SkfJR9T^xl_H+M9WCijSl0AZa z85pY67#JE_7#My5g&JNkFq9fFFuY1&V6d9Oz#v{QXIG#NP$DzHC&V=(BBHmqx2>(M zxVShiEiEP{CO<#Fw6wIny*)cSyRxz}Dk>^9HMOXyC?g}IwY4=eGP0tgqPx4hzP>&& zF|nqmrl+STB_*YM1MG8=my~NYkmHinflc>C=-BtcI3=E7PJzX3_A`V}@_OhtSK!o){ zi)tcg(-GF~*S`Ig-xSoM^ZiIx;W4xN_fKXYKFgN8nSH-y-Masp3SE<@Y&7XyY@m=L zaH8n)bQ^AlCPPVvu1oK7A{`vQm~t|{)bC?yDDdJ7xbRe$L&4?_)22yGsm~ajju<WMZ2B z^mD__`2rb>cJ1yIWm~m-df_*>N8Bd$h5y5zl{!B+|H|Cnsjs=A=847m{Q-KCbJk}} zlVRAtZi~148b7@(zQ*Y3S^H;AeYL(c^zXZ<3~Vpv99Mq( zCpE<40Lz5g`#&-CGRHK^R#Z)zyL+Mtj{(DP=B!VN?-``nBpQ_Osk@hJYl|c>_(@po zY;a>_TX5LnxvUy16OTjmqMxa+8KqbrnAb7i-@!hEL+ya5hCqOT!)1o>2YVS0^f4r} zXdl=ie9%qmd>3zqP(yosK4*hslWYQu5!V%t3v~@I4RZhf)b4ArYZ6XiI`RICB=aq9 z1IDfVH3<#c<_$Xo?C$<#mt)w%$B-^=(aKQ&r+azsvmZ>+qQaii+pZ=8(}QY>YeY#( zVo9o1a#1RfVlXl=GSW4$)HN^)F*39=GO;o?&^9ozGBB8NUt|x8hTQy=%(O~m4TeB< zCRT>#5Dn93U&sM!(16=el9`)YT#}eufUE~-iLs%Ti6KOfY18a)Ks^keu6{1-oD!M< DApTO! literal 0 HcmV?d00001 diff --git a/examples/rdpqdemo/rdpqdemo.c b/examples/rdpqdemo/rdpqdemo.c new file mode 100644 index 0000000000..8d83382f45 --- /dev/null +++ b/examples/rdpqdemo/rdpqdemo.c @@ -0,0 +1,197 @@ +#include "libdragon.h" +#include +#include + +static sprite_t *brew_sprite; +static sprite_t *tiles_sprite; + +static rspq_block_t *tiles_block; + +typedef struct { + int32_t x; + int32_t y; + int32_t dx; + int32_t dy; + float scale_factor; +} object_t; + +#define NUM_OBJECTS 64 + +static object_t objects[NUM_OBJECTS]; + +// Fair and fast random generation (using xorshift32, with explicit seed) +static uint32_t rand_state = 1; +static uint32_t myrand(void) { + uint32_t x = rand_state; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 5; + return rand_state = x; +} + +// RANDN(n): generate a random number from 0 to n-1 +#define RANDN(n) ({ \ + __builtin_constant_p((n)) ? \ + (myrand()%(n)) : \ + (uint32_t)(((uint64_t)myrand() * (n)) >> 32); \ +}) + +static int32_t obj_max_x; +static int32_t obj_max_y; +static int32_t cur_tick = 0; +static uint32_t num_objs = 1; + +void update(int ovfl) +{ + for (uint32_t i = 0; i < NUM_OBJECTS; i++) + { + object_t *obj = &objects[i]; + + int32_t x = obj->x + obj->dx; + int32_t y = obj->y + obj->dy; + + if (x >= obj_max_x) x -= obj_max_x; + if (x < 0) x += obj_max_x; + if (y >= obj_max_y) y -= obj_max_y; + if (y < 0) y += obj_max_y; + + obj->x = x; + obj->y = y; + obj->scale_factor = sinf(cur_tick * 0.1f + i) * 0.5f + 1.5f; + } + cur_tick++; +} + +void render(int cur_frame) +{ + // Attach and clear the screen + surface_t *disp = display_get(); + rdpq_attach_clear(disp, NULL); + + // Draw the tile background, by playing back the compiled block. + // This is using copy mode by default, but notice how it can switch + // to standard mode (aka "1 cycle" in RDP terminology) in a completely + // transparent way. Even if the block is compiled, the RSP commands within it + // will adapt its commands to the current render mode, Try uncommenting + // the line below to see. + rdpq_debug_log_msg("tiles"); + rdpq_set_mode_copy(false); + // rdpq_set_mode_standard(); + rspq_block_run(tiles_block); + + // Draw the brew sprites. Use standard mode because copy mode cannot handle + // scaled sprites. + rdpq_debug_log_msg("sprites"); + rdpq_set_mode_standard(); + rdpq_mode_filter(FILTER_BILINEAR); + rdpq_mode_alphacompare(1); // colorkey (draw pixel with alpha >= 1) + + for (uint32_t i = 0; i < num_objs; i++) + { + rdpq_sprite_blit(brew_sprite, objects[i].x, objects[i].y, &(rdpq_blitparms_t){ + .scale_x = objects[i].scale_factor, .scale_y = objects[i].scale_factor, + }); + } + + rdpq_detach_show(); +} + +int main() +{ + debug_init_isviewer(); + debug_init_usblog(); + + display_init(RESOLUTION_320x240, DEPTH_16_BPP, 3, GAMMA_NONE, FILTERS_RESAMPLE); + + controller_init(); + timer_init(); + + uint32_t display_width = display_get_width(); + uint32_t display_height = display_get_height(); + + dfs_init(DFS_DEFAULT_LOCATION); + + rdpq_init(); + rdpq_debug_start(); + + brew_sprite = sprite_load("rom:/n64brew.sprite"); + + obj_max_x = display_width - brew_sprite->width; + obj_max_y = display_height - brew_sprite->height; + + for (uint32_t i = 0; i < NUM_OBJECTS; i++) + { + object_t *obj = &objects[i]; + + obj->x = RANDN(obj_max_x); + obj->y = RANDN(obj_max_y); + + obj->dx = -3 + RANDN(7); + obj->dy = -3 + RANDN(7); + } + + tiles_sprite = sprite_load("rom:/tiles.sprite"); + + surface_t tiles_surf = sprite_get_pixels(tiles_sprite); + + // Create a block for the background, so that we can replay it later. + rspq_block_begin(); + + // Check if the sprite was compiled with a paletted format. Normally + // we should know this beforehand, but for this demo we pretend we don't + // know. This also shows how rdpq can transparently work in both modes. + bool tlut = false; + tex_format_t tiles_format = sprite_get_format(tiles_sprite); + if (tiles_format == FMT_CI4 || tiles_format == FMT_CI8) { + // If the sprite is paletted, turn on palette mode and load the + // palette in TMEM. We use the mode stack for demonstration, + // so that we show how a block can temporarily change the current + // render mode, and then restore it at the end. + rdpq_mode_push(); + rdpq_mode_tlut(TLUT_RGBA16); + rdpq_tex_upload_tlut(sprite_get_palette(tiles_sprite), 0, 16); + tlut = true; + } + uint32_t tile_width = tiles_sprite->width / tiles_sprite->hslices; + uint32_t tile_height = tiles_sprite->height / tiles_sprite->vslices; + + for (uint32_t ty = 0; ty < display_height; ty += tile_height) + { + for (uint32_t tx = 0; tx < display_width; tx += tile_width) + { + // Load a random tile among the 4 available in the texture, + // and draw it as a rectangle. + // Notice that this code is agnostic to both the texture format + // and the render mode (standard vs copy), it will work either way. + int s = RANDN(2)*32, t = RANDN(2)*32; + rdpq_tex_upload_sub(TILE0, &tiles_surf, NULL, s, t, s+32, t+32); + rdpq_texture_rectangle(TILE0, tx, ty, tx+32, ty+32, s, t); + } + } + + // Pop the mode stack if we pushed it before + if (tlut) rdpq_mode_pop(); + tiles_block = rspq_block_end(); + + update(0); + new_timer(TIMER_TICKS(1000000 / 60), TF_CONTINUOUS, update); + + int cur_frame = 0; + while (1) + { + render(cur_frame); + + controller_scan(); + struct controller_data ckeys = get_keys_down(); + + if (ckeys.c[0].C_up && num_objs < NUM_OBJECTS) { + ++num_objs; + } + + if (ckeys.c[0].C_down && num_objs > 1) { + --num_objs; + } + + cur_frame++; + } +} From d479334e31c547f0b4f6409608971c528d607cb6 Mon Sep 17 00:00:00 2001 From: Dennis Heinze Date: Fri, 26 Jan 2024 22:02:55 +0100 Subject: [PATCH 07/48] Remove deprecated rdp functions from examples --- examples/customfont/customfont.c | 1 - examples/spritemap/spritemap.c | 35 ++++++++++---------------------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/examples/customfont/customfont.c b/examples/customfont/customfont.c index 9f49734048..62c65a6d16 100644 --- a/examples/customfont/customfont.c +++ b/examples/customfont/customfont.c @@ -9,7 +9,6 @@ int main(void) /* Initialize peripherals */ display_init( RESOLUTION_320x240, DEPTH_16_BPP, 2, GAMMA_NONE, FILTERS_RESAMPLE ); dfs_init( DFS_DEFAULT_LOCATION ); - rdp_init(); controller_init(); timer_init(); diff --git a/examples/spritemap/spritemap.c b/examples/spritemap/spritemap.c index 44232d503b..f7ab477fa9 100644 --- a/examples/spritemap/spritemap.c +++ b/examples/spritemap/spritemap.c @@ -18,7 +18,7 @@ int main(void) /* Initialize peripherals */ display_init( RESOLUTION_320x240, DEPTH_16_BPP, 2, GAMMA_NONE, FILTERS_RESAMPLE ); dfs_init( DFS_DEFAULT_LOCATION ); - rdp_init(); + rdpq_init(); controller_init(); timer_init(); @@ -74,22 +74,18 @@ int main(void) case 1: { /* Hardware spritemap test */ - graphics_draw_text( disp, 20, 20, "Hardware spritemap test" ); - - /* Assure RDP is ready for new commands */ - rdp_sync( SYNC_PIPE ); + /* This example demonstrates drawing sprites using the RDP module which involves + lower-level functions controlling the RDP (including manually making sure that + textures you draw can fit into TMEM). - /* Remove any clipping windows */ - rdp_set_default_clipping(); + For drawing using the higher-level RDPQ module, take a look at the rdpqdemo example. */ + graphics_draw_text( disp, 20, 20, "Hardware spritemap test" ); - /* Enable sprite display instead of solid color fill */ - rdp_enable_texture_copy(); + /* Enable transparent sprite display instead of solid color fill */ + rdpq_set_mode_copy(true); - /* Attach RDP to display */ - rdp_attach( disp ); - - /* Ensure the RDP is ready to receive sprites */ - rdp_sync( SYNC_PIPE ); + /* Attach RDP to display; pass NULL as Z-buffer since we don't need it */ + rdpq_attach( disp, NULL ); /* Load the sprite into texture slot 0, at the beginning of memory, without mirroring */ rdp_load_texture( 0, 0, MIRROR_DISABLED, plane ); @@ -101,9 +97,6 @@ int main(void) all four pieces of this sprite individually in order to use the RDP at all */ for( int i = 0; i < 4; i++ ) { - /* Ensure the RDP is ready to receive sprites */ - rdp_sync( SYNC_PIPE ); - /* Load the sprite into texture slot 0, at the beginning of memory, without mirroring */ rdp_load_texture_stride( 0, 0, MIRROR_DISABLED, mudkip, i ); @@ -111,18 +104,12 @@ int main(void) rdp_draw_sprite( 0, 50 + (20 * (i % 2)), 50 + (20 * (i / 2)), MIRROR_DISABLED ); } - /* Ensure the RDP is ready to receive sprites */ - rdp_sync( SYNC_PIPE ); - /* Load the sprite into texture slot 0, at the beginning of memory, without mirroring */ rdp_load_texture_stride( 0, 0, MIRROR_DISABLED, earthbound, ((animcounter / 15) & 1) ? 1: 0 ); /* Display walking NESS animation */ rdp_draw_sprite( 0, 20, 100, MIRROR_DISABLED ); - /* Ensure the RDP is ready to receive sprites */ - rdp_sync( SYNC_PIPE ); - /* Load the sprite into texture slot 0, at the beginning of memory, without mirroring */ rdp_load_texture_stride( 0, 0, MIRROR_DISABLED, earthbound, ((animcounter / 8) & 0x7) * 2 ); @@ -130,7 +117,7 @@ int main(void) rdp_draw_sprite( 0, 50, 100, MIRROR_DISABLED ); /* Inform the RDP we are finished drawing and that any pending operations should be flushed */ - rdp_detach(); + rdpq_detach_wait(); break; } From 8d00f3c7a0588c1bbea7ea89dda9c49432658534 Mon Sep 17 00:00:00 2001 From: Dennis Heinze Date: Sat, 27 Jan 2024 15:09:42 +0100 Subject: [PATCH 08/48] rspq: deprecate rspq_signal and remove test With rdpq, all signals are now in use by libdragon itself. That means rspq_signal may not work as expected anymore, depending on whether rdpq is being used or not. Syncpoints offer a more convenient way of synchronising with RSP anyway, so rspq_signal has been deprecated. --- include/rspq.h | 27 +++++---------------------- src/rspq/rspq.c | 17 +++++++++-------- tests/test_rspq.c | 9 --------- tests/testrom.c | 1 - 4 files changed, 14 insertions(+), 40 deletions(-) diff --git a/include/rspq.h b/include/rspq.h index 18702765be..f6885b8a84 100644 --- a/include/rspq.h +++ b/include/rspq.h @@ -800,28 +800,6 @@ void rspq_highpri_sync(void); */ void rspq_noop(void); -/** - * @brief Enqueue a command that sets a signal in SP status - * - * The SP status register has 8 bits called "signals" that can be - * atomically set or cleared by both the CPU and the RSP. They can be used - * to provide asynchronous communication. - * - * This function allows to add a command to the queue that will set and/or - * clear a combination of the above bits. - * - * Notice that signal bits 2-7 are used by the RSP queue engine itself, so this - * function must only be used for bits 0 and 1. - * - * @param[in] signal A signal set/clear mask created by composing SP_WSTATUS_* - * defines. - * - * @note This is an advanced function that should be used rarely. Most - * synchronization requirements should be fulfilled via #rspq_syncpoint_new which is - * easier to use. - */ -void rspq_signal(uint32_t signal); - /** * @brief Enqueue a command to do a DMA transfer from DMEM to RDRAM * @@ -856,6 +834,11 @@ void rspq_dma_to_rdram(void *rdram_addr, uint32_t dmem_addr, uint32_t len, bool */ void rspq_dma_to_dmem(uint32_t dmem_addr, void *rdram_addr, uint32_t len, bool is_async); +/** @cond */ +__attribute__((deprecated("may not work anymore. use rspq_syncpoint_new/rspq_syncpoint_check instead"))) +void rspq_signal(uint32_t signal); +/** @endcond */ + #ifdef __cplusplus } #endif diff --git a/src/rspq/rspq.c b/src/rspq/rspq.c index 6eb3dcd0ca..d284a848df 100644 --- a/src/rspq/rspq.c +++ b/src/rspq/rspq.c @@ -1285,14 +1285,6 @@ void rspq_wait(void) if (rdpq_trace) rdpq_trace(); } -void rspq_signal(uint32_t signal) -{ - const uint32_t allowed_mask = SP_WSTATUS_CLEAR_SIG0|SP_WSTATUS_SET_SIG0|SP_WSTATUS_CLEAR_SIG1|SP_WSTATUS_SET_SIG1; - assertf((signal & allowed_mask) == signal, "rspq_signal called with a mask that contains bits outside SIG0-1: %lx", signal); - - rspq_int_write(RSPQ_CMD_WRITE_STATUS, signal); -} - static void rspq_dma(void *rdram_addr, uint32_t dmem_addr, uint32_t len, uint32_t flags) { rspq_int_write(RSPQ_CMD_DMA, PhysicalAddr(rdram_addr), dmem_addr, len, flags); @@ -1308,6 +1300,15 @@ void rspq_dma_to_dmem(uint32_t dmem_addr, void *rdram_addr, uint32_t len, bool i rspq_dma(rdram_addr, dmem_addr, len - 1, is_async ? 0 : SP_STATUS_DMA_BUSY | SP_STATUS_DMA_FULL); } +/// @cond +void rspq_signal(uint32_t signal) +{ + const uint32_t allowed_mask = SP_WSTATUS_CLEAR_SIG0|SP_WSTATUS_SET_SIG0; + assertf((signal & allowed_mask) == signal, "rspq_signal called with a mask that contains bits outside SIG0: %lx", signal); + + rspq_int_write(RSPQ_CMD_WRITE_STATUS, signal); +} +/// @endcond /* Extern inline instantiations. */ extern inline rspq_write_t rspq_write_begin(uint32_t ovl_id, uint32_t cmd_id, int size); diff --git a/tests/test_rspq.c b/tests/test_rspq.c index 4c9333e995..691f70edaf 100644 --- a/tests/test_rspq.c +++ b/tests/test_rspq.c @@ -205,15 +205,6 @@ void test_rspq_wrap(TestContext *ctx) TEST_RSPQ_EPILOG(0, rspq_timeout); } -void test_rspq_signal(TestContext *ctx) -{ - TEST_RSPQ_PROLOG(); - - rspq_signal(SP_WSTATUS_SET_SIG0); - - TEST_RSPQ_EPILOG(SP_STATUS_SIG0, rspq_timeout); -} - void test_rspq_high_load(TestContext *ctx) { TEST_RSPQ_PROLOG(); diff --git a/tests/testrom.c b/tests/testrom.c index 8dfe32c02f..8b3e06149e 100644 --- a/tests/testrom.c +++ b/tests/testrom.c @@ -263,7 +263,6 @@ static const struct Testsuite TEST_FUNC(test_rspq_queue_multiple, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_queue_rapid, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_wrap, 0, TEST_FLAGS_NO_BENCHMARK), - TEST_FUNC(test_rspq_signal, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_high_load, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_load_overlay, 0, TEST_FLAGS_NO_BENCHMARK), TEST_FUNC(test_rspq_switch_overlay, 0, TEST_FLAGS_NO_BENCHMARK), From c54b9f18a34f5c856d2cad6d8d106af19921d2bb Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Wed, 1 May 2024 14:17:30 +0200 Subject: [PATCH 09/48] Update README (cherry picked from commit fca26a4b768e6ba4af6fcf61cec828e8f2d6f887) --- README.md | 123 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 5f3ce4df05..7f790ab179 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,9 @@ ## Welcome to libdragon +> [!TIP] +> Coming back here after a while? Check the [ChangeLog](https://github.com/DragonMinded/libdragon/wiki/Stable-branch--Changelog) of our stable branch, or the [Preview branch](https://github.com/DragonMinded/libdragon/wiki/Preview-branch) + Libdragon is an open-source SDK for Nintendo 64. It aims for a complete N64 programming experience while providing programmers with modern approach to programming and debugging. These are the main features: @@ -18,42 +21,86 @@ programming and debugging. These are the main features: * The GCC toolchain is 64 bit capable to be able to use the full R4300 capabilities (commercial games and libultra are based on a 32-bit ABI and is not possible to use 64-bit registers and opcodes with it) -* Can be developed with newer-generation emulators (ares, cen64, Dillonb's n64, - simple64) or development cartridges (64drive, EverDrive64, SummerCart64). -* Support both vanilla N64 and iQue Player (Chinese variant). The support is - experimental and done fully at runtime, so it is possible to run ROMs built - with libdragon on iQue without modifying the source code. -* In-ROM filesystem implementation for assets. Assets can be loaded with - `fopen("rom://asset.dat")` without having to do complex things to link them in. -* Efficient interrupt-based timer library (also features a monotone 64-bit - timer to avoid dealing with 32-bit overflows) -* Graphics: easy-to-use API for 2D games, accelerated with RDP +* Can be developed with newer-generation emulators (Ares) and development cartridges + (64drive, EverDrive64, SummerCart64). +* Support both vanilla N64 and iQue Player (Chinese variant). It is possible + to run ROMs built with libdragon on iQue without modifying the source code. +* 2D accelerated graphics: + * Comprehensive RDP library called [rdpq](https://github.com/DragonMinded/libdragon/wiki/Rdpq) + that offers both low-level access and very high-level blitting functions. + * Support for drawing sprites of arbitrary sizes and arbitrary pixel formats. + Rdpq takes care of handling TMEM limits transparently and efficiently. + * Support for sprite zooming and rotation. Rotated sprites are transparently + drawn via triangles instead of rectangles. + * Support for all RDP pixel formats, including palettized ones. + * Very simple render mode configuration, that allows for full RDP graphic effects + including custom color combiner and blender. + * Comprehensive [mksprite](https://github.com/DragonMinded/libdragon/wiki/Mksprite) + tool, that converts from PNG format, includes optional state-of-the-art color + quantizer and dithering. + * Transparent compression of graphics for minimal ROM size +* Audio: + * Advanced RSP-accelerated mixer library, supporting up to 32 channels and + streaming samples from ROM during playback for very low memory usage. + * Supports WAV files for sound effects + * Supports streaming of uncompressed or VADPCM-compressed WAV files for music. + * Supports playing of XM modules (FastTracker, MilkyTracker, OpenMPT). Can + playback a 10-channel XM with < 3% CPU and < 10% RSP. + * Supports playing of YM modules (Arkos Tracker 2) +* Filesystems: + * In-ROM filesystem implementation for assets. Assets can be loaded with + `fopen("rom://asset.dat")` without having to do complex things to link them in. + * SD card access (`fopen("sd://asset.dat")`) on all available flashcarts. +* [Compression](https://github.com/DragonMinded/libdragon/wiki/Compression): + * Asset library for fast, transparent compression support for data files, + including your custom ones. + * Automatically integrated in conversion tools for graphics. + * Three different compression algorithms with increasing compression ratio + (and decreasing decompression speed). Currently based on LZ4, Aplib, Shrinkler. + Compression ratios competitive with gzip and xz, at higher decompression speeds. + * Optimized decompression routines in MIPS assembly that run in parallel + with DMA for maximum speed. + * Support for streaming decompression based on the `fopen()` interface. +* Debugging: + * Clear error screens with symbolized stack traces in case of crashes + * Codebase is filled with assertions, so that you get a nice error screen + instead of a console lockup. + * Printf-debugging via `debugf()` which are redirected to your PC console + in emulators and to USB via compatible tools (UNFLoader, g64drive). * Support for standard N64 controllers and memory paks. * Support for saving to flashes and EEPROMs (including a mini EEPROM filesystem to simplify serialization of structures). -* Audio: advanced RSP-accelerated library, supporting up to 32 channels and - streaming samples from ROM during playback for very low memory usage. - Supports WAV files for sound effects and the XM (FastTracker, MilkyTracker, - OpenMPT), and YM (Arkos Tracker 2) module formats for background music. - Can playback a 10-channel XM with < 3% CPU and < 10% RSP. -* Debugging aids: console (printf goes to screen) exception screen, many - asserts (so that you get a nice error screen instead of a console lockup), - `fprintf(stderr)` calls are redirected to your PC console in emulators - and to USB via compatible tools (UNFLoader, g64drive). -* Support to read/write to SD cards in development kits (64drive, EverDrive64, SummerCart64), - simply with `fopen("sd://sdata.dat")` -* Simple and powerful Makefile-based build system for your ROMs and assets - (n64.mk) The [preview branch](https://github.com/DragonMinded/libdragon/wiki/Preview-branch) features many more features: - * a new comprehensive RDP engine - * a full OpenGL 1.1 port for 3D graphics programming, with a custom, efficient RSP ucode - with full T&L support. - * a MPEG1 RSP-accelerated movie player - * support for showing source-level stack traces in case of crashes or assertions, including - source file name and line number. + * 3D graphics + * Allow for easily plugging in 3D graphics pipelines, that can + potentially even coexist in the same scene. + * Included in libdragon: full [OpenGL 1.1 port](https://github.com/DragonMinded/libdragon/wiki/OpenGL-on-N64), together with custom + N64 extensions for using RDP-specific features. + * Third-party: [Tiny3D](https://github.com/HailToDodongo/tiny3d), a high-performance native + 3D pipeline. + * Both OpenGL and Tiny3D import model files from Blender via the GLTF format, + and feature also an animation system with skinning support. + * a [MPEG1 RSP-accelerated movie player](https://github.com/DragonMinded/libdragon/wiki/MPEG1-Player), for high-quality FMVs. + * Expected performance for FMV: 320x240 movie at 800 Kbit/s at 20 fps + * Very simple to use also for render-to-texture scenarios, where + a movie is played back as part of a 3D scene or as background in + a 2D game. + * Improved boot using open-source IPL3 bootcode, which boots ROMs up to 5x + faster and allows for compressed game code (using libdragon compression library). + * Dynamic library support (DSO, sometimes called "overlays") for dynamically + loading and unloading part of game code and data. This is implemented using + the standard `dlopen()` / `dlsym()`. + * Support for [Opus audio compression](https://github.com/DragonMinded/libdragon/wiki/Opus-decompression) for ultra-compressed music streaming. + * Same state-of-the-art audio algorithm that is currently mainstream on PCs. + * Compression ratios around 15:1 for audio files. Around 3-5 minutes of mono + audio per Megabyte of ROM, depending on quality. + * RSP-accelerated for realtime playback. 18-20% of CPU usage for mono streams, + which makes it feasible for menus or not resource-intensive games. + * Well suited also for speech at very high compression ratio, would allow for + a fully talkie game. and much more. These features will eventually land to trunk, but you can start playing with them even today. Go the [preview branch doc](https://github.com/DragonMinded/libdragon/wiki/Preview-branch) for more information. @@ -68,14 +115,18 @@ Make sure to read the [full installation instructions](https://github.com/Dragon ### Using emulators -libdragon requires a modern N64 emulator (the first generation of emulators -are basically HLE-only and can only play the old commercial games). Suggested -emulators for homebrew developemnt are: [ares](https://ares-emu.net), -[cen64](https://github.com/n64dev/cen64), [dgb-n64](https://github.com/Dillonb/n64), -[simple64](https://simple64.github.io). +libdragon targets real N64 hardware and uses many advanced corners +of the hardware not used by old commercial games, and thus requires +a modern N64 emulator which focuses on full hardware emulation. + +At the moment, the only emulator that accurately emulates the hardware +(and does not just focus on playing old classics) is [Ares](https://github.com/ares-emulator/ares). Ares requires a modern PC with a discrete +GPU with Vulkan support. -On all the above emulators, you are also able to see in console anything printed -via `fprintf(stderr)`, see the debug library for more information. +You can develop 99% of your game using libdragon and the Ares emulator, +and be confident that the game will correctly run on hardware as well. +Make sure to turn on the "Homebrew mode" in Ares to enable developer +specific checks during emulation that will simplify the debugging experience. ### Using a development cartridge on a real N64 From 0d210b21014b2b495bd2c0c2e7d4267136ff9b76 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 5 May 2024 01:32:19 +0200 Subject: [PATCH 10/48] joybus: fix race condition in SI interrupt that causes a crash at start entrypoint.S writes 0x8 to PIF-RAM to complete the PIF boot process (and avoid the PIF freezing the console after ~5 seconds). Like all writes to PIF-RAM, it takes a while to get through and when it's done, an interrupt is generated. If the interrupt triggers after the __joybus_init() constructor is run, the interrupt handler would get confused and assert it. This commit fixes it by waiting for any pending SI transfers before enabling SI interrupts. Notice that the same problem was fixed in preview by the new IPL3, which terminates the PIF boot correctly, including not leaving any pending interrupt when the application is booted. --- src/joybus.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/joybus.c b/src/joybus.c index 6790903ea0..4317de7707 100644 --- a/src/joybus.c +++ b/src/joybus.c @@ -131,6 +131,11 @@ void __joybus_init(void) msgs_ridx = 0; joybus_state = JOYBUS_STATE_IDLE; + // Wait for any pending SI write. This can happen mainly because of the + // write made by entrypoint.S to complete the PIF boot. If the write is still + // pending, it would trigger a SI interrupt later and cause a crash. + while (SI_regs->status & (SI_STATUS_DMA_BUSY | SI_STATUS_IO_BUSY)) {} + // Acknowledge any pending SI interrupt SI_regs->status = 0; From 8bc64485361d868c1df2fffc4467e0a699a29411 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Fri, 26 Apr 2024 15:00:48 +0200 Subject: [PATCH 11/48] sprite: assert if a compressed sprite is loaded via old-style file reading (cherry picked from commit c1a13d9b05b71465d82ad41e07edfb5c688dd9df) --- src/sprite.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/sprite.c b/src/sprite.c index 9fe8d77578..4ed99a89fc 100644 --- a/src/sprite.c +++ b/src/sprite.c @@ -3,6 +3,7 @@ #include "debug.h" #include "surface.h" #include "sprite_internal.h" +#include "asset_internal.h" #include "asset.h" #include "utils.h" #include "rdpq_tex.h" @@ -32,6 +33,12 @@ sprite_ext_t *__sprite_ext(sprite_t *sprite) bool __sprite_upgrade(sprite_t *sprite) { + // Check if the sprite header begins with ASSET_MAGIC, which indicates a + // compressed sprite loaded with old-style file reading. In this case, we + // can emit an assertion. + assertf(memcmp(sprite, ASSET_MAGIC, 3) != 0, + "Sprite is compressed: use sprite_load() instead of reading the file manually"); + // Previously, the "format" field of the sprite structure (now renamed "flags") // was unused and always contained 0. Sprites could only be RGBA16 and RGBA32 anyway, // so only a bitdepth field could be used to understand the format. From 915c7b2a762e177a36b7d46b3d6dc4cca60d1330 Mon Sep 17 00:00:00 2001 From: Simon Eriksson Date: Sat, 11 May 2024 00:33:29 +0200 Subject: [PATCH 12/48] backtrace: Fix alignas order syntax errors with GCC 14 --- src/asset.c | 2 +- src/backtrace.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/asset.c b/src/asset.c index 6a77ac2c5f..19a93eb154 100644 --- a/src/asset.c +++ b/src/asset.c @@ -248,7 +248,7 @@ typedef struct { bool seeked; void (*reset)(void *state); ssize_t (*read)(void *state, void *buf, size_t len); - uint8_t state[] alignas(8); + uint8_t alignas(8) state[]; } cookie_cmp_t; static int readfn_cmp(void *c, char *buf, int sz) diff --git a/src/backtrace.c b/src/backtrace.c index 8532c40788..7787025110 100644 --- a/src/backtrace.c +++ b/src/backtrace.c @@ -103,7 +103,7 @@ * * The SYMT file is generated by the n64sym tool during the build process. */ -typedef struct alignas(8) { +typedef struct { char head[4]; ///< Magic ID "SYMT" uint32_t version; ///< Version of the symbol table uint32_t addrtab_off; ///< Offset of the address table in the file @@ -180,7 +180,7 @@ static symtable_header_t symt_open(void) { return (symtable_header_t){0}; } - symtable_header_t symt_header alignas(8); + symtable_header_t alignas(8) symt_header; data_cache_hit_writeback_invalidate(&symt_header, sizeof(symt_header)); dma_read(&symt_header, SYMT_ROM, sizeof(symtable_header_t)); @@ -312,7 +312,7 @@ char* __symbolize(void *vaddr, char *buf, int size) a = symt_addrtab_entry(&symt, --idx); // Read the symbol name - symtable_entry_t entry alignas(8); + symtable_entry_t alignas(8) entry; symt_entry_fetch(&symt, &entry, idx); char *func = symt_entry_func(&symt, &entry, addr, buf, size-12); char lbuf[12]; @@ -607,11 +607,11 @@ int backtrace(void **buffer, int size) static void format_entry(void (*cb)(void *, backtrace_frame_t *), void *cb_arg, symtable_header_t *symt, int idx, uint32_t addr, uint32_t offset, bool is_func, bool is_inline) { - symtable_entry_t entry alignas(8); + symtable_entry_t alignas(8) entry; symt_entry_fetch(symt, &entry, idx); - char file_buf[entry.file_len + 2] alignas(8); - char func_buf[MAX(entry.func_len + 2, 32)] alignas(8); + char alignas(8) file_buf[entry.file_len + 2]; + char alignas(8) func_buf[MAX(entry.func_len + 2, 32)]; cb(cb_arg, &(backtrace_frame_t){ .addr = addr, From 436ade24e7c4daef86946dc3a1b48978d2c7154b Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Wed, 1 May 2024 11:33:59 +0200 Subject: [PATCH 13/48] subprocess: fix some warnings (cherry picked from commit 4f5eaa749384dbc1bd81889277c81d5165def658) --- tools/common/subprocess.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/common/subprocess.h b/tools/common/subprocess.h index 5794dd8cda..8fd51b643f 100644 --- a/tools/common/subprocess.h +++ b/tools/common/subprocess.h @@ -259,7 +259,9 @@ typedef struct _OVERLAPPED *LPOVERLAPPED; #pragma clang diagnostic pop #endif +#if defined(_MSC_VER) #pragma warning(push, 1) +#endif struct subprocess_subprocess_information_s { void *hProcess; void *hThread; @@ -308,7 +310,9 @@ struct subprocess_overlapped_s { void *hEvent; }; +#if defined(_MSC_VER) #pragma warning(pop) +#endif __declspec(dllimport) unsigned long __stdcall GetLastError(void); __declspec(dllimport) int __stdcall SetHandleInformation(void *, unsigned long, @@ -416,7 +420,7 @@ int subprocess_create_named_pipe_helper(void **rd, void **wr) { static subprocess_tls long index = 0; const long unique = index++; -#if _MSC_VER < 1900 +#if defined(_MSC_VER) && _MSC_VER < 1900 #pragma warning(push, 1) #pragma warning(disable : 4996) _snprintf(name, sizeof(name) - 1, @@ -431,14 +435,14 @@ int subprocess_create_named_pipe_helper(void **rd, void **wr) { *rd = CreateNamedPipeA(name, pipeAccessInbound | fileFlagOverlapped, - pipeTypeByte | pipeWait, 1, 4096, 4096, SUBPROCESS_NULL, + pipeTypeByte | pipeWait, 1, 4096, 4096, 0, SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr)); if (invalidHandleValue == *rd) { return -1; } - *wr = CreateFileA(name, genericWrite, SUBPROCESS_NULL, + *wr = CreateFileA(name, genericWrite, 0, SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), openExisting, fileAttributeNormal, SUBPROCESS_NULL); From 1ba2663d96a55422b87b6c1cd4dc2b9c9b38f171 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Wed, 1 May 2024 11:47:44 +0200 Subject: [PATCH 14/48] graphics.h: fix C++ compilation (cherry picked from commit 448adefd498d789e539de7b831092991c0ebbb22) --- include/graphics.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/graphics.h b/include/graphics.h index 764e0e4880..50856f628d 100644 --- a/include/graphics.h +++ b/include/graphics.h @@ -35,7 +35,9 @@ typedef struct __attribute__((packed)) uint8_t a; } color_t; +#ifndef __cplusplus _Static_assert(sizeof(color_t) == 4, "invalid sizeof for color_t"); +#endif /** @brief Create a #color_t from the R,G,B,A components in the RGBA16 range (that is: RGB in 0-31, A in 0-1) */ #define RGBA16(rx,gx,bx,ax) ({ \ From 7a264caf50851b295dbcb0c8c286ff3826b27907 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Wed, 1 May 2024 11:49:35 +0200 Subject: [PATCH 15/48] tools: fix some compilation warnings, upgrade C++ version, fix clean (cherry picked from commit 1d7f39af49b61ab23dc7ba320f96afcc11188fbe) --- tools/Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/Makefile b/tools/Makefile index 7a891d138a..3c9ab9e2b8 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,8 +1,7 @@ hN64_GCCPREFIX ?= $(N64_INST) INSTALLDIR ?= $(N64_INST) -CFLAGS += -std=gnu11 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-error=sign-compare -I../include -MMD -CXXFLAGS += -std=c++11 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-error=sign-compare -MMD -LDFLAGS = +CFLAGS += -std=gnu11 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-sign-compare -I../include -MMD +CXXFLAGS += -std=gnu++17 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-sign-compare -Wno-c++11-narrowing -Wno-narrowing -Wno-error=conversion-null -MMD all: @@ -17,6 +16,8 @@ all: rm -f $@ $(AR) rcs $@ $^ +# Avoid many warnings for vendored code that we don't intend to ever modify +common/shrinkler_compress.o: CFLAGS += -Wno-all -Wno-error common-clean: rm -f common/*.o common/*.a common/*.d common/assetcomp.a: common/assetcomp.o common/lz4_compress.o \ @@ -54,7 +55,7 @@ $(1)-install: $(1) mkdir -p $(INSTALLDIR)/bin install -m 0755 $$($(1)_BIN) $(INSTALLDIR)/bin $(1)-clean: - rm -f $$($(1)_BIN) $$($(1)_DIR)/*.o $$($(1)_DIR)/*.d + rm -f $$($(1)_BIN) $$($(1)_DIR)/**/*.o $$($(1)_DIR)/**/*.d -include $$(wildcard $$($(1)_DIR)/*.d) endef @@ -62,6 +63,7 @@ $(foreach tool,$(TOOLS),$(eval $(call TOOL_template,$(tool)))) all: $(TOOLS) install: $(foreach tool,$(TOOLS),$(tool)-install) clean: $(foreach tool,$(TOOLS),$(tool)-clean) common-clean + rm -f ${n64tool_OBJS} ${n64sym_OBJS} ${ed64romconfig_OBJS} .PHONY: all install clean ifneq ($(V),1) From 6cdcd7973b2994d30d8533f8dc968600cde5c165 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Wed, 1 May 2024 11:49:51 +0200 Subject: [PATCH 16/48] tools: build static executables on Windows (cherry picked from commit 17108c7d9ed0a3af2112ae6e6285d07a25c54180) --- tools/Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/Makefile b/tools/Makefile index 3c9ab9e2b8..9c6ff18bba 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -3,6 +3,12 @@ INSTALLDIR ?= $(N64_INST) CFLAGS += -std=gnu11 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-sign-compare -I../include -MMD CXXFLAGS += -std=gnu++17 -O2 -Wall -Werror -Wno-unused-result -Wno-error=unknown-pragmas -Wno-sign-compare -Wno-c++11-narrowing -Wno-narrowing -Wno-error=conversion-null -MMD +ifeq ($(OS),Windows_NT) + CFLAGS += -static + CXXFLAGS += -static + LDFLAGS += -static +endif + all: %.o: %.c From ffc92b703e0642ed9dea323bd007e1ac25194f77 Mon Sep 17 00:00:00 2001 From: Giacomo Garbin Date: Mon, 13 May 2024 00:12:44 +0200 Subject: [PATCH 17/48] Fix typo. --- src/console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/console.c b/src/console.c index 2597815b43..2f5430aa57 100644 --- a/src/console.c +++ b/src/console.c @@ -172,7 +172,7 @@ static int __console_write( char *buf, unsigned int len ) * @brief Initialize the console * * Initialize the console system. This will initialize the video properly, so - * a call to the display_init() fuction is not necessary. + * a call to the display_init() function is not necessary. */ void console_init() { From 2e275da03f1a96472273e707443e64b1785f0f77 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 18 May 2024 17:17:57 +0200 Subject: [PATCH 18/48] display: fix display_get_fps on iQue The intermediate result was overflowing int precision (cherry picked from commit d1153c2ab94292801fb3a0f66143254ab8790d12) --- src/display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/display.c b/src/display.c index acf2ae87e6..5bb983e538 100644 --- a/src/display.c +++ b/src/display.c @@ -418,5 +418,5 @@ uint32_t display_get_num_buffers(void) float display_get_fps(void) { if (!frame_times_duration) return 0; - return (float)(FPS_WINDOW * TICKS_PER_SECOND) / frame_times_duration; + return (float)FPS_WINDOW * TICKS_PER_SECOND / frame_times_duration; } From e5d29f42638b10540881cdc35b3c27384eac018b Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 19 May 2024 14:31:07 +0200 Subject: [PATCH 19/48] wav64: remove leftover debug print (cherry picked from commit e5b93a685c240f127f9aa85c0cf50c51c2139aa0) --- src/audio/wav64.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/audio/wav64.c b/src/audio/wav64.c index 6143eb8d4b..a07b85cf89 100644 --- a/src/audio/wav64.c +++ b/src/audio/wav64.c @@ -312,8 +312,6 @@ void wav64_open(wav64_t *wav, const char *fn) { } dfs_close(fh); - debugf("wav64 %s: %d-bit %.1fHz %dch %d samples (loop: %d)\n", - fn, wav->wave.bits, wav->wave.frequency, wav->wave.channels, wav->wave.len, wav->wave.loop_len); } void wav64_play(wav64_t *wav, int ch) From b80894d7a454e5843a9c0155e21aafc3f534a655 Mon Sep 17 00:00:00 2001 From: Simon Eriksson Date: Sat, 11 May 2024 01:02:56 +0200 Subject: [PATCH 20/48] toolchain: Update GCC to 14.1 Add -fpermissive to newlib CFLAGS since current upstream does not otherwise work with the stricter GCC 14 defaults. --- README.md | 2 +- tools/build-toolchain.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7f790ab179..68248ded1e 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Libdragon is an open-source SDK for Nintendo 64. It aims for a complete N64 programming experience while providing programmers with modern approach to programming and debugging. These are the main features: -* Based on modern GCC (13) and Newlib, for a full C11 programming experience. +* Based on modern GCC (version 14) and Newlib, for a full C11 programming experience. A Docker container is available to quickly set up the programming environment. * The GCC toolchain is 64 bit capable to be able to use the full R4300 capabilities (commercial games and libultra are based on a 32-bit ABI and is not possible diff --git a/tools/build-toolchain.sh b/tools/build-toolchain.sh index 021c8fb274..d81ff52216 100755 --- a/tools/build-toolchain.sh +++ b/tools/build-toolchain.sh @@ -36,7 +36,7 @@ GCC_CONFIGURE_ARGS=() # Dependency source libs (Versions) BINUTILS_V=2.42 -GCC_V=13.2.0 +GCC_V=14.1.0 NEWLIB_V=4.4.0.20231231 GMP_V=6.3.0 MPC_V=1.3.1 @@ -224,7 +224,7 @@ popd # Compile newlib for target. mkdir -p newlib_compile_target pushd newlib_compile_target -CFLAGS_FOR_TARGET="-DHAVE_ASSERT_FUNC -O2" ../"newlib-$NEWLIB_V"/configure \ +CFLAGS_FOR_TARGET="-DHAVE_ASSERT_FUNC -O2 -fpermissive" ../"newlib-$NEWLIB_V"/configure \ --prefix="$CROSS_PREFIX" \ --target="$N64_TARGET" \ --with-cpu=mips64vr4300 \ @@ -290,7 +290,7 @@ else # Compile newlib for target. mkdir -p newlib_compile pushd newlib_compile - CFLAGS_FOR_TARGET="-DHAVE_ASSERT_FUNC -O2" ../"newlib-$NEWLIB_V"/configure \ + CFLAGS_FOR_TARGET="-DHAVE_ASSERT_FUNC -O2 -fpermissive" ../"newlib-$NEWLIB_V"/configure \ --prefix="$INSTALL_PATH" \ --target="$N64_TARGET" \ --with-cpu=mips64vr4300 \ From cfc6613c701d709542c3d145c8c78c9f3bafcfe2 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Mon, 20 May 2024 01:42:59 +0200 Subject: [PATCH 21/48] Add RSP DMA control regs to rsp.h register names from https://n64brew.dev/wiki/Reality_Signal_Processor/Interface?oldid=5376 --- include/rsp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/rsp.h b/include/rsp.h index 80fa19cfb3..54563e1110 100644 --- a/include/rsp.h +++ b/include/rsp.h @@ -172,6 +172,15 @@ extern "C" { /** @brief Current SP program counter */ #define SP_PC ((volatile uint32_t*)0xA4080000) +/** @brief SP DMA IMEM/DMEM address register */ +#define SP_DMA_SPADDR ((volatile uint32_t*)0xA4040000) +/** @brief SP DMA RDRAM address register */ +#define SP_DMA_RAMADDR ((volatile uint32_t*)0xA4040004) +/** @brief SP DMA from RDRAM to IMEM/DMEM register */ +#define SP_DMA_RDLEN ((volatile uint32_t*)0xA4040008) +/** @brief SP DMA from IMEM/DMEM to RDRAM register */ +#define SP_DMA_WRLEN ((volatile uint32_t*)0xA404000C) + /** @brief SP status register */ #define SP_STATUS ((volatile uint32_t*)0xA4040010) From b1a6bbc78e26d4c985860ef5acc64332ed98a741 Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Thu, 4 Apr 2024 22:33:03 +0200 Subject: [PATCH 22/48] Address libdragon n64 code warnings --- include/graphics.h | 4 ++-- include/rdpq_mode.h | 2 +- include/surface.h | 16 ++++++++-------- src/surface.c | 11 ++++++----- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/include/graphics.h b/include/graphics.h index 50856f628d..9c8257aebd 100644 --- a/include/graphics.h +++ b/include/graphics.h @@ -61,12 +61,12 @@ inline uint32_t color_to_packed32(color_t c) { } /** @brief Create a #color_t from the 16-bit packed format used by a #FMT_RGBA16 surface (RGBA 5551) */ inline color_t color_from_packed16(uint16_t c) { - return (color_t){ .r=((c>>11)&0x1F)<<3, .g=((c>>6)&0x1F)<<3, .b=((c>>1)&0x1F)<<3, .a=(c&0x1) ? 0xFF : 0 }; + return (color_t){ .r=(uint8_t)(((c>>11)&0x1F)<<3), .g=(uint8_t)(((c>>6)&0x1F)<<3), .b=(uint8_t)(((c>>1)&0x1F)<<3), .a=(uint8_t)((c&0x1) ? 0xFF : 0) }; } /** @brief Create a #color_t from the 32-bit packed format used by a #FMT_RGBA32 surface (RGBA 8888) */ inline color_t color_from_packed32(uint32_t c) { - return (color_t){ .r=(c>>24)&0xFF, .g=(c>>16)&0xFF, .b=(c>>8)&0xFF, .a=c&0xFF }; + return (color_t){ .r=(uint8_t)(c>>24), .g=(uint8_t)(c>>16), .b=(uint8_t)(c>>8), .a=(uint8_t)c }; } uint32_t graphics_make_color( int r, int g, int b, int a ); diff --git a/include/rdpq_mode.h b/include/rdpq_mode.h index bf88e53b26..39c5afad25 100644 --- a/include/rdpq_mode.h +++ b/include/rdpq_mode.h @@ -692,7 +692,7 @@ inline void rdpq_mode_alphacompare(int threshold) { __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, 0); } else if (threshold > 0) { __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, SOM_ALPHACOMPARE_THRESHOLD); - rdpq_set_blend_color(RGBA32(0,0,0,threshold)); + rdpq_set_blend_color(RGBA32(0,0,0,(uint8_t)threshold)); } else { __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, SOM_ALPHACOMPARE_NOISE); } diff --git a/include/surface.h b/include/surface.h index d5a127ff1e..0cbf9b8332 100644 --- a/include/surface.h +++ b/include/surface.h @@ -165,7 +165,7 @@ typedef struct surface_s * * @see #surface_make_linear */ -inline surface_t surface_make(void *buffer, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride) { +inline surface_t surface_make(void *buffer, tex_format_t format, uint16_t width, uint16_t height, uint16_t stride) { return (surface_t){ .flags = format, .width = width, @@ -192,7 +192,7 @@ inline surface_t surface_make(void *buffer, tex_format_t format, uint32_t width, * * @see #surface_make */ -inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint32_t width, uint32_t height) { +inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint16_t width, uint16_t height) { return surface_make(buffer, format, width, height, TEX_FORMAT_PIX2BYTES(format, width)); } @@ -213,7 +213,7 @@ inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint32_t * @param[in] height Height in pixels * @return The initialized surface */ -surface_t surface_alloc(tex_format_t format, uint32_t width, uint32_t height); +surface_t surface_alloc(tex_format_t format, uint16_t width, uint16_t height); /** * @brief Initialize a surface_t structure, pointing to a rectangular portion of another @@ -230,7 +230,7 @@ surface_t surface_alloc(tex_format_t format, uint32_t width, uint32_t height); * @return The initialized surface */ surface_t surface_make_sub(surface_t *parent, - uint32_t x0, uint32_t y0, uint32_t width, uint32_t height); + uint16_t x0, uint16_t y0, uint16_t width, uint16_t height); /** * @brief Free the buffer allocated in a surface. @@ -290,9 +290,9 @@ inline bool surface_has_owned_buffer(const surface_t *surface) * @see #surface_make_placeholder_linear * @see #rdpq_set_lookup_address */ -inline surface_t surface_make_placeholder(int index, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride) { +inline surface_t surface_make_placeholder(int index, tex_format_t format, uint16_t width, uint16_t height, uint16_t stride) { return (surface_t){ - .flags = format | (index << 8), + .flags = (uint16_t)(format | ((index & 0xF) << 8)), .width = width, .height = height, .stride = stride, @@ -315,14 +315,14 @@ inline surface_t surface_make_placeholder(int index, tex_format_t format, uint32 * * @see #surface_make_placeholder */ -inline surface_t surface_make_placeholder_linear(int index, tex_format_t format, uint32_t width, uint32_t height) { +inline surface_t surface_make_placeholder_linear(int index, tex_format_t format, uint16_t width, uint16_t height) { return surface_make_placeholder(index, format, width, height, TEX_FORMAT_PIX2BYTES(format, width)); } /** * @brief Returns the lookup index of a placeholder surface * - * If ths surface is a placeholder, this function returns the associated lookup + * If the surface is a placeholder, this function returns the associated lookup * index that will be used to retrieve the actual surface at playback time. * Otherwise, if it is a normal surface, this function will return 0. * diff --git a/src/surface.c b/src/surface.c index b4e2a58d63..c4a5d149d8 100644 --- a/src/surface.c +++ b/src/surface.c @@ -9,6 +9,7 @@ #include "debug.h" #include #include +#include const char* tex_format_name(tex_format_t fmt) { @@ -28,7 +29,7 @@ const char* tex_format_name(tex_format_t fmt) } } -surface_t surface_alloc(tex_format_t format, uint32_t width, uint32_t height) +surface_t surface_alloc(tex_format_t format, uint16_t width, uint16_t height) { // A common mistake is to call surface_alloc with the wrong argument order. // Try to catch it by checking that the format is not valid. @@ -53,14 +54,14 @@ void surface_free(surface_t *surface) memset(surface, 0, sizeof(surface_t)); } -surface_t surface_make_sub(surface_t *parent, uint32_t x0, uint32_t y0, uint32_t width, uint32_t height) +surface_t surface_make_sub(surface_t *parent, uint16_t x0, uint16_t y0, uint16_t width, uint16_t height) { assert(x0 + width <= parent->width); assert(y0 + height <= parent->height); tex_format_t fmt = surface_get_format(parent); assertf(TEX_FORMAT_BITDEPTH(fmt) != 4 || (x0 & 1) == 0, - "cannot create a subsurface with an odd X offset (%ld) in a 4bpp surface", x0); + "cannot create a subsurface with an odd X offset (%" PRId16 ") in a 4bpp surface", x0); surface_t sub; sub.buffer = parent->buffer + y0 * parent->stride + TEX_FORMAT_PIX2BYTES(fmt, x0); @@ -71,7 +72,7 @@ surface_t surface_make_sub(surface_t *parent, uint32_t x0, uint32_t y0, uint32_t return sub; } -extern inline surface_t surface_make(void *buffer, tex_format_t format, uint32_t width, uint32_t height, uint32_t stride); +extern inline surface_t surface_make(void *buffer, tex_format_t format, uint16_t width, uint16_t height, uint16_t stride); extern inline tex_format_t surface_get_format(const surface_t *surface); -extern inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint32_t width, uint32_t height); +extern inline surface_t surface_make_linear(void *buffer, tex_format_t format, uint16_t width, uint16_t height); extern inline bool surface_has_owned_buffer(const surface_t *surface); From 7df78015d4ecc547845be1298fe5390a401ae82a Mon Sep 17 00:00:00 2001 From: Dragorn421 Date: Sun, 19 May 2024 22:53:06 +0200 Subject: [PATCH 23/48] check getline return --- tools/n64sym.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/n64sym.c b/tools/n64sym.c index 597b4cb250..fbb07d9341 100644 --- a/tools/n64sym.c +++ b/tools/n64sym.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -171,6 +172,7 @@ void symbol_add(const char *elf, uint32_t addr, bool is_func) // First line is the function name. If instead it's the dummy 0x0 address, // it means that we're done. int n = getline(&line_buf, &line_buf_size, addr2line_r); + assert(n != -1); if (strncmp(line_buf, "0x00000000", 10) == 0) break; // If the function of name is longer than 64 bytes, truncate it. This also @@ -180,7 +182,8 @@ void symbol_add(const char *elf, uint32_t addr, bool is_func) if (n-1 > flag_max_sym_len) strcpy(&func[flag_max_sym_len-3], "..."); // Second line is the file name and line number - getline(&line_buf, &line_buf_size, addr2line_r); + int ret = getline(&line_buf, &line_buf_size, addr2line_r); + assert(ret != -1); char *colon = strrchr(line_buf, ':'); char *file = strndup(line_buf, colon - line_buf); int line = atoi(colon + 1); From f6f7ec5c6c9d1faf8313cd9a5bcb98de9dc27756 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 25 May 2024 01:02:11 +0200 Subject: [PATCH 24/48] asset: add missing function declaration (breaks builds with GCC 14) (cherry picked from commit bb21b6a45a247049fbfaa01f8352fe4b14320443) --- include/asset.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/asset.h b/include/asset.h index 7968af0166..1d2b2b216e 100644 --- a/include/asset.h +++ b/include/asset.h @@ -75,8 +75,10 @@ extern "C" { #endif -/// @private +/// @cond extern void __asset_init_compression_lvl2(void); +extern void __asset_init_compression_lvl3(void); +/// @endcond /** * @brief Enable a non-default compression level From f40cd9e418c5cd198cb0d239a7ab266c0e62cbbf Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 26 May 2024 10:25:29 +0200 Subject: [PATCH 25/48] mixer: fix mixer_ch_get_pos overflow after 23 seconds We forgot changing this when we switched to 64-bit positions in bb2eff0f. (cherry picked from commit 7cd574210d246816aef06dee22ad2234b2d5dbb5) --- src/audio/mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/audio/mixer.c b/src/audio/mixer.c index 30a575529f..2c2836da35 100644 --- a/src/audio/mixer.c +++ b/src/audio/mixer.c @@ -428,7 +428,7 @@ void mixer_ch_set_pos(int ch, float pos) { float mixer_ch_get_pos(int ch) { mixer_channel_t *c = &Mixer.channels[ch]; assertf(!(c->flags & CH_FLAGS_STEREO_SUB), "mixer_ch_get_pos: cannot call on secondary stereo channel %d", ch); - uint32_t pos = c->pos >> (c->flags & CH_FLAGS_BPS_SHIFT); + uint64_t pos = c->pos >> (c->flags & CH_FLAGS_BPS_SHIFT); return (float)pos / (float)(1< Date: Sun, 26 May 2024 19:08:25 +0200 Subject: [PATCH 26/48] rspq: Fix infinite loop with max command size Previously, the maximum command size was 63 words, which is one less word than the size of the command buffer in DMEM. Depending on alignment within the RDRAM buffer, commands with this maximum size could be DMA'd into the DMEM buffer at an offset of one word. In that case, the command ended exactly at the end of the DMEM buffer. Due to an optimization in rspq (which saves some IMEM), this lead to rspq refetching the commands in an infinite loop. This is now fixed by decreasing the maximum command size to 62 instead. Additionally, this makes rspq more robust by moving some magic numbers to macros and adding some (static) asserts. --- include/rsp_queue.inc | 12 ++++++------ include/rspq.h | 4 +++- include/rspq_constants.h | 2 ++ src/rspq/rspq.c | 10 ++++++++++ tests/rsp_test.S | 9 +++++---- tests/test_rspq.c | 23 ++++++++++++----------- tests/test_rspq_constants.h | 8 ++++++++ 7 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 tests/test_rspq_constants.h diff --git a/include/rsp_queue.inc b/include/rsp_queue.inc index 768a03adb9..36d5e23663 100644 --- a/include/rsp_queue.inc +++ b/include/rsp_queue.inc @@ -159,7 +159,7 @@ _RSPQ_SAVED_STATE_END: # function: Address of the function that will be jumped to # when this command is executed. # size: The size of the command in bytes. Must be a -# multiple of 4 and in the range [0, 252]. +# multiple of 4 and in the range [0, RSPQ_DESCRIPTOR_MAX_SIZE]. ######################################################## .macro RSPQ_DefineCommand function size .ifne ((\size) % 4) @@ -168,18 +168,18 @@ _RSPQ_SAVED_STATE_END: .endif .iflt (\size) - .error "Invalid size - valid range: [0, 252]" + .error "Invalid size - valid range: [0, RSPQ_DESCRIPTOR_MAX_SIZE]" .exitm .endif - .ifgt ((\size) - 252) - .error "Invalid size - valid range: [0, 252]" + .ifgt ((\size) - RSPQ_DESCRIPTOR_MAX_SIZE) + .error "Invalid size - valid range: [0, RSPQ_DESCRIPTOR_MAX_SIZE]" .exitm .endif # Put the command size (as number of 4 byte words) into the high 6 bits, # and the jump address shifted right by 2 bits into the lower 10. - .short (\function - _start) >> 2 | ((\size) & 0xFC) << 8 + .short (\function - _start) >> 2 | ((\size) & RSPQ_DESCRIPTOR_SIZE_MASK) << 8 .endm ######################################################## @@ -497,7 +497,7 @@ rspq_execute_command: # Command size srl rspq_cmd_size, cmd_desc, 8 - andi rspq_cmd_size, 0xFC + andi rspq_cmd_size, RSPQ_DESCRIPTOR_SIZE_MASK # Check if the command is truncated because of buffer overflow (that is, # it finishes beyond the buffer end). If so, we must refetch the buffer diff --git a/include/rspq.h b/include/rspq.h index f6885b8a84..2b61375621 100644 --- a/include/rspq.h +++ b/include/rspq.h @@ -177,7 +177,7 @@ extern "C" { #endif /** @brief Maximum size of a command (in 32-bit words). */ -#define RSPQ_MAX_COMMAND_SIZE 63 +#define RSPQ_MAX_COMMAND_SIZE 62 /** @brief Maximum size of a command that it is writable with #rspq_write * (in 32-bit words). @@ -476,6 +476,8 @@ inline rspq_write_t rspq_write_begin(uint32_t ovl_id, uint32_t cmd_id, int size) extern volatile uint32_t *rspq_cur_pointer, *rspq_cur_sentinel; extern void rspq_next_buffer(void); + assertf(size <= RSPQ_MAX_COMMAND_SIZE, "The maximum command size is %d!", RSPQ_MAX_COMMAND_SIZE); + if (__builtin_expect(rspq_cur_pointer > rspq_cur_sentinel - size, 0)) rspq_next_buffer(); diff --git a/include/rspq_constants.h b/include/rspq_constants.h index d613f8700c..1b46303433 100644 --- a/include/rspq_constants.h +++ b/include/rspq_constants.h @@ -14,6 +14,8 @@ #define RSPQ_MAX_OVERLAY_COUNT 8 #define RSPQ_OVERLAY_ID_COUNT 16 #define RSPQ_MAX_OVERLAY_COMMAND_COUNT ((RSPQ_MAX_OVERLAY_COUNT - 1) * 16) +#define RSPQ_DESCRIPTOR_SIZE_MASK 0xFC +#define RSPQ_DESCRIPTOR_MAX_SIZE RSPQ_DESCRIPTOR_SIZE_MASK /** Minimum / maximum size of a block's chunk (contiguous memory buffer) */ #define RSPQ_BLOCK_MIN_SIZE 64 diff --git a/src/rspq/rspq.c b/src/rspq/rspq.c index d284a848df..259697b203 100644 --- a/src/rspq/rspq.c +++ b/src/rspq/rspq.c @@ -210,6 +210,16 @@ // rsp_queue.S (see cmd_write_status there for an explanation). _Static_assert((RSPQ_CMD_WRITE_STATUS & 1) == 0); _Static_assert((RSPQ_CMD_TEST_WRITE_STATUS & 1) == 0); + +// Check that the DMEM buffer is sized at least for the largest command +// that we can handle, plus some extra space that's required because +// the RSP code won't run a command that ends exactly at the end of +// the buffer (see rsp_queue.inc). +_Static_assert(RSPQ_DMEM_BUFFER_SIZE >= (RSPQ_MAX_COMMAND_SIZE + 2) * 4); + +// Check that the maximum command size is actually supported by the +// internal command descriptor format. +_Static_assert(RSPQ_MAX_COMMAND_SIZE * 4 <= RSPQ_DESCRIPTOR_MAX_SIZE); /// @endcond /** @brief Smaller version of rspq_write that writes to an arbitrary pointer */ diff --git a/tests/rsp_test.S b/tests/rsp_test.S index 3b1c40b5cb..ab0fa329e1 100644 --- a/tests/rsp_test.S +++ b/tests/rsp_test.S @@ -1,4 +1,5 @@ #include +#include "test_rspq_constants.h" #define ASSERT_GP_BACKWARD 0xF001 // Also defined in test_rspq.c #define ASSERT_TOO_MANY_NOPS 0xF002 @@ -17,7 +18,7 @@ RSPQ_DefineCommand command_reset, 4 # 0x05 RSPQ_DefineCommand command_test_high, 4 # 0x06 RSPQ_DefineCommand command_reset_log, 4 # 0x07 - RSPQ_DefineCommand command_big, 132 # 0x08 + RSPQ_DefineCommand command_big, TEST_RSPQ_BIG_COMMAND_SIZE*4 # 0x08 RSPQ_DefineCommand command_big_out, 8 # 0x09 RSPQ_DefineCommand command_send_rdp, 8 # 0x0A RSPQ_DefineCommand command_send_rdp_many, 4 # 0x0B @@ -40,7 +41,7 @@ BIG_LOG_PTR: .long 0 BIG_LOG: .ds.b BIG_LOG_SIZE .align 2 -TEST_BIG: .ds.b 128 +TEST_BIG: .ds.b TEST_RSPQ_BIG_PAYLOAD_SIZE .text @@ -141,7 +142,7 @@ command_send_rdp_many: command_big: - addi s1, rspq_dmem_buf_ptr, -128 + addi s1, rspq_dmem_buf_ptr, -TEST_RSPQ_BIG_PAYLOAD_SIZE move s2, zero command_big_loop: lw t0, %lo(RSPQ_DMEM_BUFFER)(s1) @@ -158,6 +159,6 @@ command_big_out: move s0, a1 li s4, %lo(TEST_BIG) j DMAOut - li t0, DMA_SIZE(128, 1) + li t0, DMA_SIZE(TEST_RSPQ_BIG_PAYLOAD_SIZE, 1) #include diff --git a/tests/test_rspq.c b/tests/test_rspq.c index 691f70edaf..4fdc6cffa3 100644 --- a/tests/test_rspq.c +++ b/tests/test_rspq.c @@ -5,6 +5,7 @@ #include #include #include +#include "test_rspq_constants.h" #define ASSERT_GP_BACKWARD 0xF001 // Also defined in rsp_test.S #define ASSERT_TOO_MANY_NOPS 0xF002 @@ -712,27 +713,27 @@ void test_rspq_big_command(TestContext *ctx) test_ovl_init(); DEFER(test_ovl_close()); - uint32_t values[32]; - for (uint32_t i = 0; i < 32; i++) + uint32_t values[TEST_RSPQ_BIG_PAYLOAD_WORDS]; + for (uint32_t i = 0; i < TEST_RSPQ_BIG_PAYLOAD_WORDS; i++) { values[i] = RANDN(0xFFFFFFFF); } - uint32_t output[32] __attribute__((aligned(16))); - data_cache_hit_writeback_invalidate(output, 128); + uint32_t output[TEST_RSPQ_BIG_PAYLOAD_WORDS] __attribute__((aligned(16))); + data_cache_hit_writeback_invalidate(output, TEST_RSPQ_BIG_PAYLOAD_SIZE); - rspq_write_t wptr = rspq_write_begin(test_ovl_id, 0x8, 33); + rspq_write_t wptr = rspq_write_begin(test_ovl_id, 0x8, TEST_RSPQ_BIG_COMMAND_SIZE); rspq_write_arg(&wptr, 0); - for (uint32_t i = 0; i < 32; i++) + for (uint32_t i = 0; i < TEST_RSPQ_BIG_PAYLOAD_WORDS; i++) { rspq_write_arg(&wptr, i | i << 8 | i << 16 | i << 24); } rspq_write_end(&wptr); - wptr = rspq_write_begin(test_ovl_id, 0x8, 33); + wptr = rspq_write_begin(test_ovl_id, 0x8, TEST_RSPQ_BIG_COMMAND_SIZE); rspq_write_arg(&wptr, 0); - for (uint32_t i = 0; i < 32; i++) + for (uint32_t i = 0; i < TEST_RSPQ_BIG_PAYLOAD_WORDS; i++) { rspq_write_arg(&wptr, values[i]); } @@ -742,14 +743,14 @@ void test_rspq_big_command(TestContext *ctx) TEST_RSPQ_EPILOG(0, rspq_timeout); - uint32_t expected[32]; - for (uint32_t i = 0; i < 32; i++) + uint32_t expected[TEST_RSPQ_BIG_PAYLOAD_WORDS]; + for (uint32_t i = 0; i < TEST_RSPQ_BIG_PAYLOAD_WORDS; i++) { uint32_t x = i | i << 8 | i << 16 | i << 24; expected[i] = x ^ values[i]; } - ASSERT_EQUAL_MEM((uint8_t*)output, (uint8_t*)expected, 128, "Output does not match!"); + ASSERT_EQUAL_MEM((uint8_t*)output, (uint8_t*)expected, TEST_RSPQ_BIG_PAYLOAD_SIZE, "Output does not match!"); } void test_rspq_rdp_dynamic(TestContext *ctx) diff --git a/tests/test_rspq_constants.h b/tests/test_rspq_constants.h new file mode 100644 index 0000000000..8312998ed4 --- /dev/null +++ b/tests/test_rspq_constants.h @@ -0,0 +1,8 @@ +#ifndef TEST_RSPQ_CONSTANTS +#define TEST_RSPQ_CONSTANTS + +#define TEST_RSPQ_BIG_COMMAND_SIZE 62 +#define TEST_RSPQ_BIG_PAYLOAD_WORDS (TEST_RSPQ_BIG_COMMAND_SIZE-1) +#define TEST_RSPQ_BIG_PAYLOAD_SIZE (TEST_RSPQ_BIG_PAYLOAD_WORDS*4) + +#endif From 3357b2ff67094066b632582be9f24523539c8269 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 8 Jun 2024 23:21:13 +0200 Subject: [PATCH 27/48] rdpq_debug: improve alpha compare bug detection --- src/rdpq/rdpq_debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rdpq/rdpq_debug.c b/src/rdpq/rdpq_debug.c index 3d284f24af..98208ebe4c 100644 --- a/src/rdpq/rdpq_debug.c +++ b/src/rdpq/rdpq_debug.c @@ -1118,10 +1118,12 @@ static void lazy_validate_rendermode(void) { VALIDATE_ERR_CC(ccs[1].rgb.mul != 9, "in 2cycle mode, the color combiner cannot access the TEX1_ALPHA slot in the second cycle (but TEX0_ALPHA contains the second texture)"); if (rdp.som.alphacmp.enable && !rdp.som.alphacmp.noise) { - bool cc1_passthrough = (ccs[1].alpha.mul == 7 && ccs[1].alpha.add == 0); // (any-any)*0+combined + bool cc1_passthrough = (ccs[1].alpha.mul == 7 && ccs[1].alpha.add == 0); // (any-any)*0+combined + cc1_passthrough |= (ccs[1].alpha.suba == ccs[1].alpha.subb && ccs[1].alpha.add == 0); // (same-same)*any+combine + cc1_passthrough |= memcmp(&ccs[1], &ccs[0], sizeof(ccs[0])) == 0; // same as cycle0 VALIDATE_ERR_CC(cc1_passthrough, "in 2cycle mode, alpha compare is broken if the second alpha combiner cycle is not a passthrough because of a hardware bug"); - VALIDATE_WARN_CC(!cc1_passthrough, + VALIDATE_ERR_SOM(!cc1_passthrough, "in 2cycle mode, alpha compare is often shifted by one pixel because of a hardware bug"); } } From 845f53f048b117fd19219fdde2edb45bdf493fd6 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sat, 8 Jun 2024 23:30:55 +0200 Subject: [PATCH 28/48] rdpq_font: add outline support for mono (non-aliased) fonts --- src/rdpq/rdpq_font.c | 84 ++++++++++++++----- src/rdpq/rdpq_font_internal.h | 10 +++ tools/mkfont/mkfont.cpp | 17 +++- tools/mkfont/mkfont_out.cpp | 154 ++++++++++++++++++++++++++-------- tools/mkfont/mkfont_ttf.cpp | 152 +++++++++++++++------------------ 5 files changed, 272 insertions(+), 145 deletions(-) diff --git a/src/rdpq/rdpq_font.c b/src/rdpq/rdpq_font.c index 8835a8ff72..43a0db7760 100644 --- a/src/rdpq/rdpq_font.c +++ b/src/rdpq/rdpq_font.c @@ -29,32 +29,59 @@ _Static_assert(sizeof(kerning_t) == 3, "kerning_t size is wrong"); #define PTR_DECODE(font, ptr) ((void*)(((uint8_t*)(font)) + (uint32_t)(ptr))) #define PTR_ENCODE(font, ptr) ((void*)(((uint8_t*)(ptr)) - (uint32_t)(font))) -static void recalc_style(style_t *s, tex_format_t fmt) +static void recalc_style(int font_type, style_t *s) { if (s->block) rdpq_call_deferred((void (*)(void*))rspq_block_free, s->block); rspq_block_begin(); - rdpq_mode_begin(); - switch (fmt) { - case FMT_I4: case FMT_I8: - rdpq_set_mode_standard(); - rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,PRIM), (TEX0,0,PRIM,0))); - rdpq_mode_alphacompare(1); - rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY); + switch (font_type) { + case FONT_TYPE_ALIASED: + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,PRIM), (TEX0,0,PRIM,0))); + rdpq_mode_alphacompare(1); + rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY); + rdpq_mode_end(); rdpq_set_prim_color(s->color); break; - case FMT_CI4: case FMT_CI8: - rdpq_set_mode_standard(); - rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,PRIM), (TEX0,0,PRIM,0))); - rdpq_mode_alphacompare(1); - rdpq_mode_tlut(TLUT_RGBA16); + case FONT_TYPE_MONO: + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,PRIM), (TEX0,0,PRIM,0))); + rdpq_mode_alphacompare(1); + rdpq_mode_tlut(TLUT_RGBA16); + rdpq_mode_end(); rdpq_set_prim_color(s->color); break; + case FONT_TYPE_MONO_OUTLINE: + // Mono-outline fonts are CI4 with IA16 palettes. Each texel is + // a IA16 color as follows: 0x0000=transparent, 0xFFFF=fill, 0x00FF=outline + // So TEX will become either 0x00 or 0xFF, and TEX_ALPHA will be 0x20 (or 0 for transparent) + // We set a combiner that does PRIM*TEX + ENV*(1-TEX), so that we can + // select between the fill and the outline color, in PRIM and ENV respectively. + // Unfortunately, we can't use alpha compare with a two-stage combiner because of + // a RDP bug; so we simulate it using SOM_BLALPHA_CVG_TIMES_CC which multiplies + // the alpha by the coverage (which should be full on all pixels) before hitting + // the blender, and this causes a similar transparent effect. + // to turn on AA for this to work (for unknown reasons). + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_mode_combiner(RDPQ_COMBINER2( + (ONE,TEX1,ENV,0), (0,0,0,TEX1), + (TEX1,0,PRIM,COMBINED), (0,0,0,COMBINED) + )); + rdpq_mode_antialias(AA_REDUCED); + rdpq_mode_tlut(TLUT_IA16); + rdpq_mode_end(); + rdpq_change_other_modes_raw(SOM_BLALPHA_MASK, SOM_BLALPHA_CVG_TIMES_CC); + rdpq_set_prim_color(s->color); + rdpq_set_env_color(s->outline_color); + break; default: assert(0); } - rdpq_mode_end(); + s->block = rspq_block_end(); } @@ -65,7 +92,7 @@ rdpq_font_t* rdpq_font_load_buf(void *buf, int sz) assertf(sz >= sizeof(rdpq_font_t), "Font buffer too small (sz=%d)", sz); assertf(memcmp(fnt->magic, FONT_MAGIC_LOADED, 3), "Trying to load already loaded font data (buf=%p, sz=%08x)", buf, sz); assertf(!memcmp(fnt->magic, FONT_MAGIC, 3), "invalid font data (magic: %c%c%c)", fnt->magic[0], fnt->magic[1], fnt->magic[2]); - assertf(fnt->version == 4, "unsupported font version: %d\nPlease regenerate fonts with an updated mkfont tool", fnt->version); + assertf(fnt->version == 5, "unsupported font version: %d\nPlease regenerate fonts with an updated mkfont tool", fnt->version); fnt->ranges = PTR_DECODE(fnt, fnt->ranges); fnt->glyphs = PTR_DECODE(fnt, fnt->glyphs); fnt->atlases = PTR_DECODE(fnt, fnt->atlases); @@ -75,8 +102,9 @@ rdpq_font_t* rdpq_font_load_buf(void *buf, int sz) void *buf = PTR_DECODE(fnt, fnt->atlases[i].sprite); fnt->atlases[i].sprite = sprite_load_buf(buf, fnt->atlases[i].size); rspq_block_begin(); - switch (sprite_get_format(fnt->atlases[i].sprite)) { - case FMT_CI4: + int font_type = fnt->flags & FONT_FLAG_TYPE_MASK; + switch (font_type) { + case FONT_TYPE_MONO: { surface_t surf = sprite_get_pixels(fnt->atlases[i].sprite); rdpq_tex_multi_begin(); rdpq_tex_upload(TILE0, &surf, NULL); @@ -84,9 +112,21 @@ rdpq_font_t* rdpq_font_load_buf(void *buf, int sz) rdpq_tex_reuse(TILE2, &(rdpq_texparms_t){ .palette = 2 }); rdpq_tex_reuse(TILE3, &(rdpq_texparms_t){ .palette = 3 }); rdpq_tex_multi_end(); - rdpq_tex_upload_tlut(sprite_get_palette(fnt->atlases[i].sprite), 0, 64); + rdpq_tex_upload_tlut(sprite_get_palette(fnt->atlases[i].sprite), 0, font_type == FONT_TYPE_MONO ? 64 : 32); break; - + } + case FONT_TYPE_MONO_OUTLINE: { + surface_t surf = sprite_get_pixels(fnt->atlases[i].sprite); + rdpq_tex_multi_begin(); + // Outline font uses only TILE1 and TILE2 because the combiner only uses + // TEX1 and never TEX0 (see recalc_style). + rdpq_tex_upload(TILE1, &surf, NULL); + rdpq_tex_reuse(TILE2, &(rdpq_texparms_t){ .palette = 1 }); + rdpq_tex_multi_end(); + rdpq_tex_upload_tlut(sprite_get_palette(fnt->atlases[i].sprite), 0, font_type == FONT_TYPE_MONO ? 64 : 32); + break; + } + case FONT_TYPE_ALIASED: default: rdpq_sprite_upload(TILE0, fnt->atlases[i].sprite, NULL); break; @@ -95,9 +135,8 @@ rdpq_font_t* rdpq_font_load_buf(void *buf, int sz) fnt->atlases[i].up = rspq_block_end(); } - tex_format_t fmt = sprite_get_format(fnt->atlases[0].sprite); for (int i = 0; i < fnt->num_styles; i++) - recalc_style(&fnt->styles[i], fmt); + recalc_style(fnt->flags & FONT_FLAG_TYPE_MASK, &fnt->styles[i]); memcpy(fnt->magic, FONT_MAGIC_LOADED, 3); data_cache_hit_writeback(fnt, sz); return fnt; @@ -188,8 +227,7 @@ void rdpq_font_style(rdpq_font_t *fnt, uint8_t style_id, const rdpq_fontstyle_t // mkfont time. The font always contain room for 256 styles (all zeroed). style_t *s = &fnt->styles[style_id]; s->color = style->color; - tex_format_t fmt = sprite_get_format(fnt->atlases[0].sprite); - recalc_style(s, fmt); + recalc_style(fnt->flags & FONT_FLAG_TYPE_MASK, s); } int rdpq_font_render_paragraph(const rdpq_font_t *fnt, const rdpq_paragraph_char_t *chars, float x0, float y0) diff --git a/src/rdpq/rdpq_font_internal.h b/src/rdpq/rdpq_font_internal.h index 2c9548d8f7..62b16ea5c3 100644 --- a/src/rdpq/rdpq_font_internal.h +++ b/src/rdpq/rdpq_font_internal.h @@ -16,6 +16,14 @@ typedef struct rspq_block_s rspq_block_t; /** @brief font64 owned font buffer magic */ #define FONT_MAGIC_OWNED "FNO" +#define FONT_FLAG_TYPE_MASK 0x0000000F ///< Mask for the font type + +enum { + FONT_TYPE_ALIASED = 0, ///< Aliased font (I4) + FONT_TYPE_MONO = 1, ///< Mono font (CI4, which are 4 1bpp layers) + FONT_TYPE_MONO_OUTLINE = 2, ///< Mono font with outline (CI4, which are 2 2bpp layers) +}; + /** @brief A range of codepoint (part of #rdpq_font_t) */ typedef struct { uint32_t first_codepoint; ///< First codepoint in the range @@ -55,6 +63,7 @@ typedef struct kerning_s { /** @brief Data related to font styling */ typedef struct style_s { color_t color; ///< Color of the text + color_t outline_color; ///< Color of the outline (if any) rspq_block_t *block; ///< RSPQ block that configures the style } style_t; @@ -62,6 +71,7 @@ typedef struct style_s { typedef struct rdpq_font_s { char magic[3]; ///< Magic header (FONT_MAGIC) uint8_t version; ///< Version number (1) + uint32_t flags; ///< Flags uint32_t point_size; ///< Point size of the font int32_t ascent; ///< Ascent (number of pixels above baseline) int32_t descent; ///< Descent (number of pixels below baseline) diff --git a/tools/mkfont/mkfont.cpp b/tools/mkfont/mkfont.cpp index e6e6370a55..e97ff3f0df 100644 --- a/tools/mkfont/mkfont.cpp +++ b/tools/mkfont/mkfont.cpp @@ -37,7 +37,7 @@ std::vector flag_ranges; const char *n64_inst = NULL; int flag_ellipsis_cp = 0x002E; int flag_ellipsis_repeats = 3; -bool flag_ttf_outline = false; +float flag_ttf_outline = 0; bool flag_ttf_monochrome = false; void print_args( char * name ) @@ -56,6 +56,9 @@ void print_args( char * name ) fprintf(stderr, "TTF/OTF specific flags:\n"); fprintf(stderr, " -s/--size Point size of the font (default: whatever the font defaults to)\n"); fprintf(stderr, " -r/--range Range of unicode codepoints to convert, as hex values (default: 20-7F)\n"); + fprintf(stderr, " (can be specified multiple times)\n"); + fprintf(stderr, " --monochrome Force monochrome output, with no aliasing (default: off)\n"); + fprintf(stderr, " --outline Add outline to font, specifying its width in (fractional) pixels\n"); fprintf(stderr, "\n"); fprintf(stderr, "BMFont specific flags:\n"); fprintf(stderr, "\n"); @@ -114,6 +117,18 @@ int main(int argc, char *argv[]) flag_ranges.push_back(r1); } else if (!strcmp(argv[i], "--monochrome")) { flag_ttf_monochrome = true; + } else if (!strcmp(argv[i], "--outline")) { + if (++i == argc) { + fprintf(stderr, "missing argument for %s\n", argv[i-1]); + return 1; + } + float outline; + char extra; + if (sscanf(argv[i], "%f%c", &outline, &extra) != 1) { + fprintf(stderr, "invalid argument for %s: %s\n", argv[i-1], argv[i]); + return 1; + } + flag_ttf_outline = outline; } else if (!strcmp(argv[i], "--ellipsis")) { if (++i == argc) { fprintf(stderr, "missing argument for %s\n", argv[i-1]); diff --git a/tools/mkfont/mkfont_out.cpp b/tools/mkfont/mkfont_out.cpp index 6d6998833a..c757a09ad4 100644 --- a/tools/mkfont/mkfont_out.cpp +++ b/tools/mkfont/mkfont_out.cpp @@ -111,8 +111,19 @@ struct Image { return (i << 24) | (i << 16) | (i << 8) | i; } case FMT_CI8: { - uint32_t i = palette[*data]; - return (i << 24) | (i << 16) | (i << 8) | i; + if (!palette) { + uint32_t i = *data; + return (i << 24) | (i << 16) | (i << 8) | i; + } + uint16_t val = palette[*data]; + uint32_t r = (val >> 11) & 0x1F; + uint32_t g = (val >> 6) & 0x1F; + uint32_t b = (val >> 1) & 0x1F; + uint32_t a = (val & 1) * 0xFF; + r = (r << 3) | (r >> 2); + g = (g << 3) | (g >> 2); + b = (b << 3) | (b >> 2); + return (r << 24) | (g << 16) | (b << 8) | a; } default: assert(!"unsupported format"); @@ -143,8 +154,11 @@ struct Image { } case FMT_I8: { data[0] = a; - break; - } + } break; + case FMT_CI8: { + assert(px < 256); + data[0] = px; + } break; default: assert(!"unsupported format"); break; @@ -268,8 +282,8 @@ struct Image { // A Glyph to be added to the font struct Glyph { int gidx; // index in the glyph array in font64 - uint32_t codepoint; // unicode codepoint - Image img; + uint32_t codepoint; // unicode codepoint + Image img; // Pixel image (see add_glyph for valid formats) int xoff, yoff; int xadv; @@ -286,18 +300,21 @@ struct Font { int num_atlases = 0; std::string outfn; bool is_mono = true; + bool has_outline = false; - Font(std::string fn, int point_size, int ascent, int descent, int line_gap, int space_width) + Font(std::string fn, int point_size, int ascent, int descent, int line_gap, int space_width, bool outline) { outfn = fn; fnt = (rdpq_font_t*)calloc(1, sizeof(rdpq_font_t)); memcpy(fnt->magic, FONT_MAGIC, 3); - fnt->version = 4; + fnt->version = 5; + fnt->flags = FONT_TYPE_ALIASED; fnt->point_size = point_size; fnt->ascent = ascent; fnt->descent = descent; fnt->line_gap = line_gap; fnt->space_width = space_width; + has_outline = outline; } ~Font() @@ -339,6 +356,7 @@ void Font::write() w8(out, fnt->magic[1]); w8(out, fnt->magic[2]); w8(out, fnt->version); + w32(out, fnt->flags); w32(out, fnt->point_size); w32(out, fnt->ascent); w32(out, fnt->descent); @@ -423,10 +441,12 @@ void Font::write() walign(out, 16); uint32_t offset_styles = ftell(out); w32(out, 0xFFFFFFFF); // color + w32(out, 0x40404040); // outline w32(out, 0); // runtime pointer for (int i=0; i<255; i++) { w32(out, 0); // color + w32(out, 0); // outline w32(out, 0); // runtime pointer } @@ -486,11 +506,23 @@ int Font::add_glyph(uint32_t cp, Image img, int xoff, int yoff, int xadv) return -1; } - // Check if the font is still mono - bool was_mono = is_mono; - is_mono &= img.is_mono(); - if (was_mono != is_mono && num_atlases > 0) - assert(!"cannot mix mono and non-mono glyphs in the same font in different ranges"); + if (has_outline) { + // Outline fonts are only monochromatic, must be CI8, and values should be: + // 0 = transparent, 1 = outline, 2 = fill + if (img.fmt != FMT_CI8) assert(!"glyph image must be CI8 for outlined fonts"); + for (int i=0;i 0) + assert(!"cannot mix mono and non-mono glyphs in the same font in different ranges"); + } // Crop the image to the actual glyph size int x0=0, y0=0; @@ -502,8 +534,21 @@ int Font::add_glyph(uint32_t cp, Image img, int xoff, int yoff, int xadv) void Font::make_atlases(void) { - if (is_mono && num_atlases == 0 && flag_verbose) - fprintf(stderr, "monochrome glyphs detected, auto-switching to FMT_I1 (1bpp)\n"); + if (is_mono && num_atlases == 0) { + if (has_outline) { + if (flag_verbose) fprintf(stderr, "monochrome+outlined glyphs detected, auto-switching to 2bpp atlases\n"); + fnt->flags = (fnt->flags & ~FONT_FLAG_TYPE_MASK) | FONT_TYPE_MONO_OUTLINE; + } else { + if (flag_verbose) fprintf(stderr, "monochrome glyphs detected, auto-switching to 1bpp atlases\n"); + fnt->flags = (fnt->flags & ~FONT_FLAG_TYPE_MASK) | FONT_TYPE_MONO; + } + } + + // Determine how many different layers the final atlases will be: + // Aliased font: single layer (we need 4 bits for the 16 intensity levels) + // Mono, no outline: we can use 1bpp, so we can merge 4 layers + // Mono, outline: we can use 2bpp, so we can merge 2 layers + int merge_layers = !is_mono ? 1 : (has_outline ? 2 : 4); // Pack the glyphs into a texture rect_pack::Settings settings; @@ -512,15 +557,13 @@ void Font::make_atlases(void) settings.method = rect_pack::Method::Best; settings.max_width = 128; settings.max_height = 64; - settings.method = rect_pack::Method::Best; - settings.max_width = 128; - settings.max_height = 64; - settings.border_padding = 1; + settings.border_padding = 0; settings.allow_rotate = false; std::vector sizes; std::vector sheets; for (int i=0; i new_sheets = rect_pack::pack(settings, sizes2); - if (new_sheets.size() <= 4) { + if (new_sheets.size() <= merge_layers) { if (flag_verbose >= 2) printf(" found better packing: %d x %d (%d)\n", w, h, w*h); best_sheets = new_sheets; @@ -597,7 +642,7 @@ void Font::make_atlases(void) for (int i=0; iglyphs[glyph.gidx]; gout->natlas = i; if (is_mono) { - gout->ntile = i & 3; - gout->natlas /= 4; + gout->ntile = i & (merge_layers-1); + gout->natlas /= merge_layers; } gout->s = rect.x; gout->t = rect.y; gout->xoff = glyph.xoff; @@ -641,6 +686,12 @@ void Font::make_atlases(void) if (flag_debug) { char *imgfn = NULL; asprintf(&imgfn, "%s_%d.png", outfn.c_str(), num_atlases); + if (img.fmt == FMT_CI8) { + img.palette.resize(3); + img.palette[0] = 0; + img.palette[1] = (31<<11) | (31<<6) | (31<<1) | 1; + img.palette[2] = (10<<11) | (10<<6) | (10<<1) | 1; + } img.write_png(imgfn); if (flag_verbose) fprintf(stderr, "wrote debug image: %s\n", imgfn); @@ -652,12 +703,13 @@ void Font::make_atlases(void) } if (is_mono) { + assert(merge_layers == 2 || merge_layers == 4); std::vector atlases2; for (int i=0; i>2 : j&3; + switch (px) { + // IA16 palette with either I=FF or A=FF to identify fill vs outline + case 1: img.palette[i*16+j] = 0xFFFF; break; + case 2: img.palette[i*16+j] = 0x00FF; break; + } + } + } } if (flag_verbose) { @@ -702,6 +773,15 @@ void Font::make_atlases(void) for (int i=0; iglyphs[g.gidx]; + gout->xadvance = g.xadv; + } + } + // Clear the glyph array, as we have added these to the atlases already glyphs.clear(); } diff --git a/tools/mkfont/mkfont_ttf.cpp b/tools/mkfont/mkfont_ttf.cpp index 68a5660718..aa812783fb 100644 --- a/tools/mkfont/mkfont_ttf.cpp +++ b/tools/mkfont/mkfont_ttf.cpp @@ -48,14 +48,14 @@ int convert_ttf(const char *infn, const char *outfn, std::vector& ranges) int space_width = face->size->metrics.max_advance >> 6; if (flag_verbose) printf("asc: %d dec: %d scalable:%d fixed:%d\n", ascent, descent, FT_IS_SCALABLE(face), FT_HAS_FIXED_SIZES(face)); - Font font(outfn, point_size, ascent, descent, line_gap, space_width); + Font font(outfn, point_size, ascent, descent, line_gap, space_width, flag_ttf_outline > 0); // Create a map from font64 glyph indices to truetype indices std::unordered_map gidx_to_ttfidx; FT_Stroker stroker; FT_Stroker_New(ftlib, &stroker); - FT_Stroker_Set(stroker, 1*64, FT_STROKER_LINECAP_ROUND, FT_STROKER_LINEJOIN_ROUND, 0); + FT_Stroker_Set(stroker, flag_ttf_outline * 64, FT_STROKER_LINECAP_ROUND, FT_STROKER_LINEJOIN_ROUND, 0); // Go through all the ranges for (int r=0; r& ranges) exit(1); } - FT_GlyphSlot slot = face->glyph; - FT_Bitmap bmp = slot->bitmap; + if (flag_ttf_outline == 0) { + FT_GlyphSlot slot = face->glyph; + FT_Bitmap bmp = slot->bitmap; - Image img(FMT_I8, bmp.width, bmp.rows); + Image img = Image(FMT_I8, bmp.width, bmp.rows); - switch (bmp.pixel_mode) { - case FT_PIXEL_MODE_MONO: - for (int y=0; ybitmap_left, -slot->bitmap_top, slot->advance.x); + gidx_to_ttfidx[gidx] = ttf_idx; + + } else { + FT_Render_Mode rm = flag_ttf_monochrome ? FT_RENDER_MODE_MONO : FT_RENDER_MODE_NORMAL; + + FT_Glyph ftglyph1, ftglyph2; + FT_Load_Glyph(face, ttf_idx, FT_LOAD_DEFAULT); + FT_Get_Glyph(face->glyph, &ftglyph1); + FT_Glyph_Copy(ftglyph1, &ftglyph2); + + FT_Glyph_To_Bitmap(&ftglyph1, rm, nullptr, true); + FT_BitmapGlyph bitmapGlyph1 = reinterpret_cast(ftglyph1); + + FT_Glyph_StrokeBorder(&ftglyph2, stroker, false, true); + FT_Glyph_To_Bitmap(&ftglyph2, rm, nullptr, true); + FT_BitmapGlyph bitmapGlyph2 = reinterpret_cast(ftglyph2); + + int img_top = std::max(bitmapGlyph1->top, bitmapGlyph2->top); + int img_left = std::min(bitmapGlyph1->left, bitmapGlyph2->left); + + int img_width = std::max(bitmapGlyph1->left + bitmapGlyph1->bitmap.width, bitmapGlyph2->left + bitmapGlyph2->bitmap.width) - img_left; + int img_height = std::max(bitmapGlyph1->top + bitmapGlyph1->bitmap.rows, bitmapGlyph2->top + bitmapGlyph2->bitmap.rows) - img_top; + + Image img = Image(FMT_CI8, img_width, img_height); + + // Copy the outline bitmap to the image + for (int y = 0; y < bitmapGlyph2->bitmap.rows; y++) { + for (int x = 0; x < bitmapGlyph2->bitmap.width; x++) { + uint8_t v = bitmapGlyph2->bitmap.buffer[y * bitmapGlyph2->bitmap.width + x]; + if (v != 0) + img[y + img_top - bitmapGlyph2->top][x - img_left + bitmapGlyph2->left] = 2; } } - break; - default: - fprintf(stderr, "internal error: unsupported freetype pixel mode: %d\n", bmp.pixel_mode); - return 1; - } - // } else { - - // FT_Glyph ftglyph1, ftglyph2; - // FT_Load_Glyph(face, ttf_idx, FT_LOAD_DEFAULT); - // FT_Get_Glyph(face->glyph, &ftglyph1); - // FT_Glyph_Copy(ftglyph1, &ftglyph2); - - // FT_Glyph_To_Bitmap(&ftglyph1, FT_RENDER_MODE_NORMAL, nullptr, true); - // FT_BitmapGlyph bitmapGlyph1 = reinterpret_cast(ftglyph1); - - // FT_Glyph_StrokeBorder(&ftglyph2, stroker, false, true); - // FT_Glyph_To_Bitmap(&ftglyph2, FT_RENDER_MODE_NORMAL, nullptr, true); - // FT_BitmapGlyph bitmapGlyph2 = reinterpret_cast(ftglyph2); - - // bmp_width = 128; - // bmp_height = 128; - - // bitmap.resize(128 * 128); - // uint8_t pixel32[128*128*4]; // rgba buffer - // memset(pixel32, 0, 128*128*4); - - // // printf("bitmap1: %d x %d -- %d,%d\n", bitmapGlyph1->bitmap.width, bitmapGlyph1->bitmap.rows, - // // bitmapGlyph1->left, bitmapGlyph1->top); - // // printf("bitmap2: %d x %d -- %d,%d\n", bitmapGlyph2->bitmap.width, bitmapGlyph2->bitmap.rows, - // // bitmapGlyph2->left, bitmapGlyph2->top); - - // const int outR = 0x0, outG = 0x0, outB = 0x0; - // const int fillR = 0xFF, fillG = 0xFF, fillB = 0xFF; - - // // Copy the second bitmap to the rgba buffer with yellow color - // for (int y = 0; y < bitmapGlyph2->bitmap.rows; y++) { - // for (int x = 0; x < bitmapGlyph2->bitmap.width; x++) { - // uint8_t v = bitmapGlyph2->bitmap.buffer[y * bitmapGlyph2->bitmap.width + x]; - // int i = (y + 70 - bitmapGlyph2->top) * 128 + x + 10 + bitmapGlyph2->left; - // pixel32[i * 4 + 0] = outR; - // pixel32[i * 4 + 1] = outG; - // pixel32[i * 4 + 2] = outB; - // pixel32[i * 4 + 3] = v; - // } - // } - // (void)bitmapGlyph2; - - // // Copy the first bitmap to the rgba buffer with red color, blending - // // it over the yellow color - // for (int y = 0; y < bitmapGlyph1->bitmap.rows; y++) { - // for (int x = 0; x < bitmapGlyph1->bitmap.width; x++) { - // uint8_t v = bitmapGlyph1->bitmap.buffer[y * bitmapGlyph1->bitmap.width + x]; - // int i = (y + 70 - bitmapGlyph1->top) * 128 + x + 10 + bitmapGlyph1->left; - // uint8_t *dst = &pixel32[i * 4]; - // dst[0] = dst[0] + ((fillR - dst[0]) * v) / 255; - // dst[1] = dst[1] + ((fillG - dst[1]) * v) / 255; - // dst[2] = dst[2] + ((fillB - dst[2]) * v) / 255; - // dst[3] = dst[3] + v >= 128 ? 255 : 0; - // } - // } - - // bmp_bpp = 4; - // bmp_left = 0; - // bmp_top = 0; - // bmp_adv = 0; - // } - - int gidx = font.add_glyph(g, img, slot->bitmap_left, -slot->bitmap_top, slot->advance.x); - gidx_to_ttfidx[gidx] = ttf_idx; + // Copy the first bitmap to the image + for (int y = 0; y < bitmapGlyph1->bitmap.rows; y++) { + for (int x = 0; x < bitmapGlyph1->bitmap.width; x++) { + uint8_t v = bitmapGlyph1->bitmap.buffer[y * bitmapGlyph1->bitmap.width + x]; + if (v != 0) + img[y + img_top - bitmapGlyph1->top][x - img_left + bitmapGlyph1->left] = 1; + } + } + + int gidx = font.add_glyph(g, std::move(img), img_left, -img_top, face->glyph->advance.x); + gidx_to_ttfidx[gidx] = ttf_idx; + } } font.make_atlases(); From 36f220ac9c065a207a9e4b15b771332e8aa9c173 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 9 Jun 2024 00:06:30 +0200 Subject: [PATCH 29/48] mkfont: add --range all to extract all glyphs from a font --- tools/mkfont/mkfont.cpp | 34 ++++++++++++++++++++++++++++------ tools/mkfont/mkfont_bmfont.cpp | 17 ----------------- tools/mkfont/mkfont_ttf.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/tools/mkfont/mkfont.cpp b/tools/mkfont/mkfont.cpp index e97ff3f0df..91fabb4680 100644 --- a/tools/mkfont/mkfont.cpp +++ b/tools/mkfont/mkfont.cpp @@ -40,9 +40,26 @@ int flag_ellipsis_repeats = 3; float flag_ttf_outline = 0; bool flag_ttf_monochrome = false; +std::vector unicode_ranges{ + 0x0000, 0x0020, 0x0080, 0x0100, 0x180, 0x250, 0x2b0, 0x300, 0x370, 0x400, + 0x500, 0x530, 0x590, 0x600, 0x700, 0x780, 0x900, 0x980, 0xa00, 0xa80, + 0xa00, 0xa80, 0xb00, 0xb80, 0xc00, 0xc80, 0xd00, 0xd80, 0xe00, 0xe80, + 0xf00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x13A0, 0x1400, 0x1680, 0x16A0, + 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, 0x1800, 0x1900, 0x1950, 0x19E0, + 0x1D00, 0x1E00, 0x1F00, 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, + 0x2190, 0x2200, 0x2300, 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, + 0x2600, 0x2700, 0x27C0, 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, + 0x2E80, 0x2F00, 0x2FF0, 0x3000, 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, + 0x31A0, 0x31F0, 0x3200, 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, + 0xAC00, 0xD800, 0xDB80, 0xDC00, 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, + 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0, 0x10000, 0x10080, 0x10100, + 0x10300, 0x10330, 0x10380, 0x10400, 0x10450, 0x10480, 0x10800, 0x1D000, 0x1D100, + 0x1D300, 0x1D400, 0x20000, 0x2F800, 0x2FA20 +}; + void print_args( char * name ) { - fprintf(stderr, "mkfont -- Convert TTF/OTF/BMFont fonts into the font64 format for libdragon\n\n"); + fprintf(stderr, "mkfont -- Convert TTF/OTF fonts into the font64 format for libdragon\n\n"); fprintf(stderr, "Usage: %s [flags] \n", name); fprintf(stderr, "\n"); fprintf(stderr, "Command-line flags:\n"); @@ -56,12 +73,11 @@ void print_args( char * name ) fprintf(stderr, "TTF/OTF specific flags:\n"); fprintf(stderr, " -s/--size Point size of the font (default: whatever the font defaults to)\n"); fprintf(stderr, " -r/--range Range of unicode codepoints to convert, as hex values (default: 20-7F)\n"); - fprintf(stderr, " (can be specified multiple times)\n"); + fprintf(stderr, " Can be specified multiple times. Use \"--range all\" to extract all\n"); + fprintf(stderr, " glyphs in the font.\n"); fprintf(stderr, " --monochrome Force monochrome output, with no aliasing (default: off)\n"); fprintf(stderr, " --outline Add outline to font, specifying its width in (fractional) pixels\n"); fprintf(stderr, "\n"); - fprintf(stderr, "BMFont specific flags:\n"); - fprintf(stderr, "\n"); fprintf(stderr, "It is possible to convert multiple ranges of codepoints, by specifying\n"); fprintf(stderr, "--range more than one time.\n"); } @@ -75,6 +91,7 @@ int main(int argc, char *argv[]) char *infn = NULL, *outfn = NULL; const char *outdir = "."; bool error = false; int compression = DEFAULT_COMPRESSION; + bool range_all = false; if (argc < 2) { print_args(argv[0]); @@ -107,6 +124,10 @@ int main(int argc, char *argv[]) fprintf(stderr, "missing argument for %s\n", argv[i-1]); return 1; } + if (strcmp(argv[i], "all") == 0) { + range_all = true; + continue; + } int r0, r1; char extra; if (sscanf(argv[i], "%x-%x%c", &r0, &r1, &extra) != 2) { @@ -175,8 +196,9 @@ int main(int argc, char *argv[]) char* ext = strrchr(basename_noext, '.'); if (ext) *ext = '\0'; - if (flag_ranges.empty()) { - // Default range (ASCII) + if (range_all) { + flag_ranges.clear(); + } else if (flag_ranges.empty()) { flag_ranges.push_back(0x20); flag_ranges.push_back(0x7F); } diff --git a/tools/mkfont/mkfont_bmfont.cpp b/tools/mkfont/mkfont_bmfont.cpp index 0df6bc888b..84e48e6253 100644 --- a/tools/mkfont/mkfont_bmfont.cpp +++ b/tools/mkfont/mkfont_bmfont.cpp @@ -9,23 +9,6 @@ // Bring in tex_format_t definition #include "surface.h" -std::vector unicode_ranges{ - 0x0000, 0x0020, 0x0080, 0x0100, 0x180, 0x250, 0x2b0, 0x300, 0x370, 0x400, - 0x500, 0x530, 0x590, 0x600, 0x700, 0x780, 0x900, 0x980, 0xa00, 0xa80, - 0xa00, 0xa80, 0xb00, 0xb80, 0xc00, 0xc80, 0xd00, 0xd80, 0xe00, 0xe80, - 0xf00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x13A0, 0x1400, 0x1680, 0x16A0, - 0x1700, 0x1720, 0x1740, 0x1760, 0x1780, 0x1800, 0x1900, 0x1950, 0x19E0, - 0x1D00, 0x1E00, 0x1F00, 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, - 0x2190, 0x2200, 0x2300, 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, - 0x2600, 0x2700, 0x27C0, 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, - 0x2E80, 0x2F00, 0x2FF0, 0x3000, 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, - 0x31A0, 0x31F0, 0x3200, 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, - 0xAC00, 0xD800, 0xDB80, 0xDC00, 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, - 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0, 0x10000, 0x10080, 0x10100, - 0x10300, 0x10330, 0x10380, 0x10400, 0x10450, 0x10480, 0x10800, 0x1D000, 0x1D100, - 0x1D300, 0x1D400, 0x20000, 0x2F800, 0x2FA20 -}; - const char* tex_format_name(tex_format_t fmt) { switch ((int)fmt) { case FMT_NONE: return "AUTO"; diff --git a/tools/mkfont/mkfont_ttf.cpp b/tools/mkfont/mkfont_ttf.cpp index aa812783fb..d8c73653d6 100644 --- a/tools/mkfont/mkfont_ttf.cpp +++ b/tools/mkfont/mkfont_ttf.cpp @@ -1,6 +1,7 @@ #include #include #include +#include // Freetype #include "freetype/FreeTypeAmalgam.h" @@ -57,6 +58,29 @@ int convert_ttf(const char *infn, const char *outfn, std::vector& ranges) FT_Stroker_New(ftlib, &stroker); FT_Stroker_Set(stroker, flag_ttf_outline * 64, FT_STROKER_LINECAP_ROUND, FT_STROKER_LINEJOIN_ROUND, 0); + if (ranges.empty()) { + unsigned idx; + std::map> range_map; + uint32_t cp = FT_Get_First_Char(face, &idx); + while (idx) { + int range = *(std::upper_bound(unicode_ranges.begin(), unicode_ranges.end(), cp)-1); + + auto r = range_map.find(range); + if (r != range_map.end()) { + r->second.first = MIN(r->second.first, cp); + r->second.second = MAX(r->second.second, cp); + } else { + range_map.insert({range, {cp, cp}}); + } + + cp = FT_Get_Next_Char(face, cp, &idx); + } + for (auto r : range_map) { + ranges.push_back(r.second.first); + ranges.push_back(r.second.second); + } + } + // Go through all the ranges for (int r=0; r Date: Sun, 9 Jun 2024 23:12:04 +0200 Subject: [PATCH 30/48] rdpq_font: add outline color to font style parms --- include/rdpq_font.h | 1 + src/rdpq/rdpq_font.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/rdpq_font.h b/include/rdpq_font.h index 8f17feb0a7..95fc9d5714 100644 --- a/include/rdpq_font.h +++ b/include/rdpq_font.h @@ -64,6 +64,7 @@ void rdpq_font_free(rdpq_font_t *fnt); */ typedef struct rdpq_fontstyle_s { color_t color; ///< Color of the text + color_t outline_color; ///< Color of the outline (if any) } rdpq_fontstyle_t; /** diff --git a/src/rdpq/rdpq_font.c b/src/rdpq/rdpq_font.c index 43a0db7760..7bea7e52ee 100644 --- a/src/rdpq/rdpq_font.c +++ b/src/rdpq/rdpq_font.c @@ -227,6 +227,7 @@ void rdpq_font_style(rdpq_font_t *fnt, uint8_t style_id, const rdpq_fontstyle_t // mkfont time. The font always contain room for 256 styles (all zeroed). style_t *s = &fnt->styles[style_id]; s->color = style->color; + s->outline_color = style->outline_color; recalc_style(fnt->flags & FONT_FLAG_TYPE_MASK, s); } From 7f89174b7689c851dcb669931cb808b3aebea94e Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Sun, 9 Jun 2024 23:12:19 +0200 Subject: [PATCH 31/48] rdpq_text: allow to change initial style id via a parameter --- include/rdpq_text.h | 1 + src/rdpq/rdpq_paragraph.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/rdpq_text.h b/include/rdpq_text.h index 3a4107656c..29b78b17f2 100644 --- a/include/rdpq_text.h +++ b/include/rdpq_text.h @@ -216,6 +216,7 @@ typedef enum { /** @brief Print formatting parameters */ typedef struct rdpq_textparms_s { + int16_t style_id; ///< Initial style ID for the text int16_t width; ///< Maximum horizontal width of the paragraph, in pixels (0 if unbounded) int16_t height; ///< Maximum vertical height of the paragraph, in pixels (0 if unbounded) rdpq_align_t align; ///< Horizontal alignment (0=left, 1=center, 2=right) diff --git a/src/rdpq/rdpq_paragraph.c b/src/rdpq/rdpq_paragraph.c index 5082041147..81d0f79f56 100644 --- a/src/rdpq/rdpq_paragraph.c +++ b/src/rdpq/rdpq_paragraph.c @@ -86,6 +86,7 @@ void rdpq_paragraph_builder_begin(const rdpq_textparms_t *parms, uint8_t initial builder.xscale = 1.0f; builder.yscale = 1.0f; rdpq_paragraph_builder_font(initial_font_id); + builder.style_id = builder.parms->style_id; // start at center of pixel so that all rounds are to nearest builder.x = builder.parms->indent; builder.y = (builder.parms->height ? builder.font->ascent : 0); From 516498a24608eaec4b00534f4a85acde662371d0 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 00:31:14 +0200 Subject: [PATCH 32/48] mkfont: fix bug with outline fonts using multiple atlases --- tools/mkfont/mkfont_out.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/mkfont/mkfont_out.cpp b/tools/mkfont/mkfont_out.cpp index c757a09ad4..03e0f8cfe9 100644 --- a/tools/mkfont/mkfont_out.cpp +++ b/tools/mkfont/mkfont_out.cpp @@ -705,7 +705,7 @@ void Font::make_atlases(void) if (is_mono) { assert(merge_layers == 2 || merge_layers == 4); std::vector atlases2; - for (int i=0; i Date: Mon, 10 Jun 2024 00:31:28 +0200 Subject: [PATCH 33/48] mkfont: fix bug when using --outline together with --monochrome --- tools/mkfont/mkfont_ttf.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/mkfont/mkfont_ttf.cpp b/tools/mkfont/mkfont_ttf.cpp index d8c73653d6..4e1e3e1f96 100644 --- a/tools/mkfont/mkfont_ttf.cpp +++ b/tools/mkfont/mkfont_ttf.cpp @@ -157,7 +157,11 @@ int convert_ttf(const char *infn, const char *outfn, std::vector& ranges) // Copy the outline bitmap to the image for (int y = 0; y < bitmapGlyph2->bitmap.rows; y++) { for (int x = 0; x < bitmapGlyph2->bitmap.width; x++) { - uint8_t v = bitmapGlyph2->bitmap.buffer[y * bitmapGlyph2->bitmap.width + x]; + uint8_t v; + if (flag_ttf_monochrome) + v = (bitmapGlyph2->bitmap.buffer[y * bitmapGlyph2->bitmap.pitch + x / 8] & (1 << (7 - x % 8))) ? 1 : 0; + else + v = bitmapGlyph2->bitmap.buffer[y * bitmapGlyph2->bitmap.pitch + x]; if (v != 0) img[y + img_top - bitmapGlyph2->top][x - img_left + bitmapGlyph2->left] = 2; } @@ -166,7 +170,11 @@ int convert_ttf(const char *infn, const char *outfn, std::vector& ranges) // Copy the first bitmap to the image for (int y = 0; y < bitmapGlyph1->bitmap.rows; y++) { for (int x = 0; x < bitmapGlyph1->bitmap.width; x++) { - uint8_t v = bitmapGlyph1->bitmap.buffer[y * bitmapGlyph1->bitmap.width + x]; + uint8_t v; + if (flag_ttf_monochrome) + v = (bitmapGlyph1->bitmap.buffer[y * bitmapGlyph1->bitmap.pitch + x / 8] & (1 << (7 - x % 8))) ? 1 : 0; + else + v = bitmapGlyph1->bitmap.buffer[y * bitmapGlyph1->bitmap.pitch + x]; if (v != 0) img[y + img_top - bitmapGlyph1->top][x - img_left + bitmapGlyph1->left] = 1; } From 55b6227964d6257e7d4d45e5295bc23d29fc7779 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 11:57:57 +0200 Subject: [PATCH 34/48] rdpq_text: add workaround for AA smearing of text (enabled by default) --- include/rdpq_paragraph.h | 15 +++++++++++++++ include/rdpq_text.h | 2 ++ src/rdpq/rdpq_paragraph.c | 30 ++++++++++++++++++++++++------ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/rdpq_paragraph.h b/include/rdpq_paragraph.h index cdf20d3bbe..e70320013f 100644 --- a/include/rdpq_paragraph.h +++ b/include/rdpq_paragraph.h @@ -38,6 +38,20 @@ typedef struct __attribute__((packed)) rdpq_paragraph_char_s { _Static_assert(sizeof(rdpq_paragraph_char_t) == 8, "rdpq_paragraph_char_t is not packed"); +/** @brief Bitmask flags for #rdpq_paragraph_t */ +enum rdpq_paragraph_flag_e { + /// @brief Draw a transparent background rectangle to avoid AA artifacts + /// When drawing text on a 3D background using anti-aliasing (also enabled + /// in #display_init), the text might interact with the AA filter performed + /// by the VI and causes artifacts such as smearing. To avoid this, this flag + /// tells #rdpq_paragraph_render to draw a transparent rectangle behind the + /// text. + /// This flag is set by default when using #rdpq_text_printn, #rdpq_text_printf, + /// and #rdpq_text_print. It can be disabled by setting #rdpq_textparms_t::disable_aa_fix + /// while rendering. + RDPQ_PARAGRAPH_FLAG_ANTIALIAS_FIX = (1 << 0), +}; + /** * @brief A paragraph of text, fully laid out * @@ -61,6 +75,7 @@ typedef struct { int nchars; ///< Total number of chars in this layout int capacity; ///< Capacity of the chars array float x0, y0; ///< Alignment offset of the text + int flags; ///< Flags (see #rdpq_paragraph_flag_e) rdpq_paragraph_char_t chars[]; ///< Array of chars } rdpq_paragraph_t; diff --git a/include/rdpq_text.h b/include/rdpq_text.h index 29b78b17f2..c95ca03af0 100644 --- a/include/rdpq_text.h +++ b/include/rdpq_text.h @@ -171,6 +171,7 @@ #define LIBDRAGON_RDPQ_TEXT_H #include +#include #include #ifdef __cplusplus @@ -226,6 +227,7 @@ typedef struct rdpq_textparms_s { int16_t line_spacing; ///< Extra spacing between lines (in addition to font height) rdpq_textwrap_t wrap; ///< Wrap mode int16_t *tabstops; ///< Array of tab stops, in pixels (0-terminated) + bool disable_aa_fix; ///< Obtain a small rendering speedup by disabling the anti-aliasing fix. Can be enabled when anti-alias is disabled in #display_init. (see #RDPQ_PARAGRAPH_FLAG_ANTIALIAS_FIX for more details). } rdpq_textparms_t; diff --git a/src/rdpq/rdpq_paragraph.c b/src/rdpq/rdpq_paragraph.c index 81d0f79f56..a044d2b76c 100644 --- a/src/rdpq/rdpq_paragraph.c +++ b/src/rdpq/rdpq_paragraph.c @@ -2,6 +2,8 @@ #include "rdpq_text.h" #include "rdpq_font.h" #include "rdpq_font_internal.h" +#include "rdpq_mode.h" +#include "rdpq_rect.h" #include "debug.h" #include "fmath.h" #include @@ -75,12 +77,15 @@ void rdpq_paragraph_builder_begin(const rdpq_textparms_t *parms, uint8_t initial static const rdpq_textparms_t empty_parms = {0}; builder.parms = parms ? parms : &empty_parms; - if (!layout) { - const int initial_chars = 256; - layout = malloc(sizeof(rdpq_paragraph_t) + sizeof(rdpq_paragraph_char_t) * initial_chars); - memset(layout, 0, sizeof(*layout)); - layout->capacity = initial_chars; - } + int layout_cap = 256; + if (!layout) + layout = malloc(sizeof(rdpq_paragraph_t) + sizeof(rdpq_paragraph_char_t) * layout_cap); + else + layout_cap = layout->capacity; + memset(layout, 0, sizeof(*layout)); + layout->capacity = layout_cap; + if (!builder.parms->disable_aa_fix) + layout->flags |= RDPQ_PARAGRAPH_FLAG_ANTIALIAS_FIX; builder.layout = layout; builder.xscale = 1.0f; @@ -504,6 +509,19 @@ void rdpq_paragraph_render(const rdpq_paragraph_t *layout, float x0, float y0) { const rdpq_paragraph_char_t *ch = layout->chars; + if (layout->flags & RDPQ_PARAGRAPH_FLAG_ANTIALIAS_FIX) { + rdpq_mode_begin(); + rdpq_set_mode_standard(); + rdpq_mode_blender(RDPQ_BLENDER_MULTIPLY); + rdpq_mode_combiner(RDPQ_COMBINER1((0,0,0,0),(0,0,0,0))); + rdpq_mode_end(); + + // Draw a rectangle that covers three horizontal pixels on horizontal edges, + // and one pixel on vertical edges. This makes sure the VI AA filter will + // never fetch one of the text pixels. + rdpq_fill_rectangle(layout->bbox.x0 + x0 - 3, layout->bbox.y0 + y0 - 1, layout->bbox.x1 + x0 + 6, layout->bbox.y1 + y0 + 2); + } + x0 += layout->x0; y0 += layout->y0; while (ch->font_id != 0) { From b3d6aab71d27e5d6cd2915b9c94605e1c4e44395 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 12:14:52 +0200 Subject: [PATCH 35/48] rdpq_paragraph: default tabstops to 32 pixels --- include/rdpq_text.h | 2 +- src/rdpq/rdpq_paragraph.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/include/rdpq_text.h b/include/rdpq_text.h index c95ca03af0..ff39222391 100644 --- a/include/rdpq_text.h +++ b/include/rdpq_text.h @@ -226,7 +226,7 @@ typedef struct rdpq_textparms_s { int16_t char_spacing; ///< Extra spacing between chars (in addition to glyph width and kerning) int16_t line_spacing; ///< Extra spacing between lines (in addition to font height) rdpq_textwrap_t wrap; ///< Wrap mode - int16_t *tabstops; ///< Array of tab stops, in pixels (0-terminated) + int16_t *tabstops; ///< Array of tab stops, in pixels (0-terminated). If NULL, tab stops are every 32 pixels bool disable_aa_fix; ///< Obtain a small rendering speedup by disabling the anti-aliasing fix. Can be enabled when anti-alias is disabled in #display_init. (see #RDPQ_PARAGRAPH_FLAG_ANTIALIAS_FIX for more details). } rdpq_textparms_t; diff --git a/src/rdpq/rdpq_paragraph.c b/src/rdpq/rdpq_paragraph.c index a044d2b76c..19e1b02da7 100644 --- a/src/rdpq/rdpq_paragraph.c +++ b/src/rdpq/rdpq_paragraph.c @@ -203,16 +203,21 @@ void rdpq_paragraph_builder_span(const char *utf8_text, int nbytes) float last_pixel = xcur + xoff2 * builder.xscale; - if (UNLIKELY(is_tab) && parms->tabstops) { - // Go to next tabstop - for (int t=0; parms->tabstops[t] != 0; t++) { - if (last_pixel < parms->tabstops[t] * builder.xscale) { - xcur = parms->tabstops[t] * builder.xscale; - break; + if (UNLIKELY(is_tab)) { + if (parms->tabstops) { + // Go to next tabstop + for (int t=0; parms->tabstops[t] != 0; t++) { + if (last_pixel < parms->tabstops[t] * builder.xscale) { + xcur = parms->tabstops[t] * builder.xscale; + break; + } } + } else { + // Arbitrarly put tabstops every 32 pixels + xcur += xadvance * builder.xscale; + xcur = fm_ceilf(xcur / 32.0f) * 32.0f; } } else { - // Advance the cursor (rounding to nearest pixel xcur += xadvance * builder.xscale; } From 101c99fa5d981de4a2cb3e18a4fdc2112b181fb8 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 12:38:51 +0200 Subject: [PATCH 36/48] ipl3: slight speedup by using RSP DMA to clear IMEM instead of CPU --- boot/ipl3.c | 31 ++++++++++++------------------- boot/loader.c | 2 -- boot/rdram.c | 3 +++ 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/boot/ipl3.c b/boot/ipl3.c index d05caa166c..f5b7677fda 100644 --- a/boot/ipl3.c +++ b/boot/ipl3.c @@ -168,27 +168,13 @@ typedef struct { _Static_assert(sizeof(bootinfo_t) == 16, "invalid sizeof(bootinfo_t)"); -void rsp_clear_mem(uint32_t mem, unsigned int size) -{ - while (*SP_DMA_BUSY) {} - uint32_t *ptr = (uint32_t*)mem; - uint32_t *ptr_end = (uint32_t*)(mem + size); - while (ptr < ptr_end) - *ptr++ = 0; - - // *SP_RSP_ADDR = 0x1000; // IMEM - // *SP_DRAM_ADDR = 8*1024*1024 + 0x2000; // Most RDRAM addresses >8 MiB always return 0 - // *SP_RD_LEN = 4096-1; - // while (*SP_DMA_BUSY) {} -} - static void bzero8(void *mem) { asm ("sdl $0, 0(%0); sdr $0, 7(%0);" :: "r"(mem)); } // Clear memory using RSP DMA. We use IMEM as source address, which -// was cleared in rsp_clear_imem(). The size can be anything up to 1 MiB, +// was cleared in mem_bank_init(). The size can be anything up to 1 MiB, // since the DMA would just wrap around in IMEM. void rsp_bzero_async(uint32_t rdram, int size) { @@ -224,6 +210,17 @@ void rsp_bzero_async(uint32_t rdram, int size) // schedule two transfers for each bank. static void mem_bank_init(int chip_id, bool last) { + if (chip_id == -1) { + // First call, we clear SP_IMEM that will be used later. + // We run a DMA from RDRAM address > 8MiB where many areas return 0 on read. + // Notice that we can do this only after RI has been initialized. + while (*SP_DMA_BUSY) {} + *SP_RSP_ADDR = 0x1000; + *SP_DRAM_ADDR = 8*1024*1024 + 0x2000; + *SP_RD_LEN = 4096-1; + return; + } + uint32_t base = chip_id*1024*1024; int size = 2*1024*1024; @@ -251,10 +248,6 @@ void stage1pre(void) __attribute__((noreturn, section(".stage1"))) void stage1(void) { - // Clear IMEM (contains IPL2). We don't need it anymore, and we can - // instead use IMEM as a zero-buffer for RSP DMA. - rsp_clear_mem((uint32_t)SP_IMEM, 4096); - entropy_init(); usb_init(); debugf("Libdragon IPL3"); diff --git a/boot/loader.c b/boot/loader.c index 574a455bde..2c5235f7db 100644 --- a/boot/loader.c +++ b/boot/loader.c @@ -49,8 +49,6 @@ // Stage 1 functions we want to reuse __attribute__((far)) -extern void rsp_clear_mem(uint32_t mem, int size); -__attribute__((far)) extern void rsp_bzero_async(uint32_t rdram, int size); __attribute__((far)) extern void cop0_clear_cache(void); diff --git a/boot/rdram.c b/boot/rdram.c index e6cc1ed415..fb3d0dd108 100644 --- a/boot/rdram.c +++ b/boot/rdram.c @@ -345,6 +345,9 @@ int rdram_init(void (*bank_found)(int chip_id, bool last)) // Initialize RDRAM register access rdram_reg_init(); + // First call to callback, now that RI is initialized + bank_found(-1, false); + // Follow the init procedure specified in the datasheet. // First, put all of them to a fixed high ID (we use RDRAM_MAX_DEVICE_ID). enum { From e73348254c060f83d1b1c19b73caf140c195d137 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 12:50:34 +0200 Subject: [PATCH 37/48] ipl3: fix potential race condition Maybe it never triggers but better safe than sorry. --- boot/ipl3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/boot/ipl3.c b/boot/ipl3.c index f5b7677fda..02e1161584 100644 --- a/boot/ipl3.c +++ b/boot/ipl3.c @@ -325,6 +325,7 @@ void stage1(void) void *rdram_stage2 = LOADER_BASE(memsize, stage2_size); *PI_DRAM_ADDR = (uint32_t)rdram_stage2; *PI_CART_ADDR = (uint32_t)stage2_start - 0xA0000000; + while (*SP_DMA_BUSY) {} // Make sure RDRAM clearing is finished before reading data *PI_WR_LEN = stage2_size-1; // Clear D/I-cache, useful after warm boot. Maybe not useful for cold @@ -350,6 +351,7 @@ void stage1(void) data_cache_hit_writeback_invalidate((void*)0x80000300, 0x20); #endif + // Wait until stage 2 is fully loaded into RDRAM while (*PI_STATUS & 1) {} // Jump to stage 2 in RDRAM. From b5a37d84574c3761fa946da6b5280b65f354ccf1 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 13:07:27 +0200 Subject: [PATCH 38/48] ipl3: correctly configure X2 bit in RDRAM chips When I first wrote the IPL3, I read some RDRAM details in the "RDRAM Concurrent" manual linked in the n64brew wiki, but it seems this was a mistake, because that refers to a newer version of RDRAM. Chips on N64 units are "RDRAM Base" chips, and there are some differences. In particular, that manual advised to use the "FR" bit during the initial configuration, but it seems that bit doesn't exist in the RDRAM base modules. Instead, the X2 bit is required to be set. Thanks to bsmiles32 for first noticing the issue in #158, and to korgeaux for testing and experimentations in this area. --- boot/rdram.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/boot/rdram.c b/boot/rdram.c index fb3d0dd108..887960eb52 100644 --- a/boot/rdram.c +++ b/boot/rdram.c @@ -200,13 +200,13 @@ static int rdram_reg_w_mode(int nchip, bool auto_current, uint8_t cci) { uint8_t cc = cci ^ 0x3F; // invert bits to non inverted value enum { - FR = 1 << 12, + X2 = 1 << 6, // ? CURRENT_CONTROL_AUTO = 1 << 7, // Set auto current mode AUTO_SKIP = 1 << 2, // ? DEVICE_EN = 1 << 1, // Enable direct chip configuration (even without broadcast) }; - uint32_t value = DEVICE_EN | AUTO_SKIP | FR; + uint32_t value = DEVICE_EN | AUTO_SKIP | X2; if (auto_current) value |= CURRENT_CONTROL_AUTO; value |= CCVALUE(cc); @@ -355,7 +355,7 @@ int rdram_init(void (*bank_found)(int chip_id, bool last)) INVALID_ID = RDRAM_MAX_DEVICE_ID - 2, }; rdram_reg_w_deviceid(RDRAM_BROADCAST, INITIAL_ID); - rdram_reg_w(RDRAM_BROADCAST, RDRAM_REG_MODE, (1<<12)|(1<<2)); + rdram_reg_w(RDRAM_BROADCAST, RDRAM_REG_MODE, (1<<6)|(1<<2)); rdram_reg_w(RDRAM_BROADCAST, RDRAM_REG_REF_ROW, 0); // Initialization loop @@ -369,7 +369,7 @@ int rdram_init(void (*bank_found)(int chip_id, bool last)) rdram_reg_w_deviceid(INITIAL_ID, chip_id); // Turn on the chip (set DE=1) - rdram_reg_w(chip_id, RDRAM_REG_MODE, (1<<12) | (1<<1) | (1<<2)); + rdram_reg_w(chip_id, RDRAM_REG_MODE, (1<<6) | (1<<1) | (1<<2)); // Check if the DE bit was turned on. If it's not, a chip is not present // and we can abort the initialization loop. From 6ab24a5ea4d83a180d8e24e02035d374d1b14f98 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 16:57:22 +0200 Subject: [PATCH 39/48] mkfont: small tweak to add_glyph prototype --- tools/mkfont/mkfont_out.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/mkfont/mkfont_out.cpp b/tools/mkfont/mkfont_out.cpp index 03e0f8cfe9..9fc215e0a1 100644 --- a/tools/mkfont/mkfont_out.cpp +++ b/tools/mkfont/mkfont_out.cpp @@ -331,7 +331,7 @@ struct Font { void write(FILE *out); void add_range(int first, int last); - int add_glyph(uint32_t cp, Image img, int xoff, int yoff, int xadv); + int add_glyph(uint32_t cp, Image&& img, int xoff, int yoff, int xadv); void add_atlas(Image& img); void add_kerning(int glyph1, int glyph2, int kerning); void add_ellipsis(int ellipsis_cp, int ellipsis_repeats); @@ -498,7 +498,7 @@ int Font::get_glyph_index(uint32_t cp) return -1; } -int Font::add_glyph(uint32_t cp, Image img, int xoff, int yoff, int xadv) +int Font::add_glyph(uint32_t cp, Image&& img, int xoff, int yoff, int xadv) { int gidx = get_glyph_index(cp); if (gidx < 0) { From 56ea6e41e6bc10500e5faef525363e502361952b Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 17:15:41 +0200 Subject: [PATCH 40/48] ipl3: fix previous commit by making sure IMEM is cleared on warm boot too --- boot/ipl3.c | 18 +- tools/ipl3.h | 1176 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 816 insertions(+), 378 deletions(-) diff --git a/boot/ipl3.c b/boot/ipl3.c index 02e1161584..b8751ec387 100644 --- a/boot/ipl3.c +++ b/boot/ipl3.c @@ -173,6 +173,16 @@ static void bzero8(void *mem) asm ("sdl $0, 0(%0); sdr $0, 7(%0);" :: "r"(mem)); } +static void rsp_bzero_init(void) +{ + // We run a DMA from RDRAM address > 8MiB where many areas return 0 on read. + // Notice that we can do this only after RI has been initialized. + while (*SP_DMA_BUSY) {} + *SP_RSP_ADDR = 0x1000; + *SP_DRAM_ADDR = 8*1024*1024 + 0x2000; + *SP_RD_LEN = 4096-1; +} + // Clear memory using RSP DMA. We use IMEM as source address, which // was cleared in mem_bank_init(). The size can be anything up to 1 MiB, // since the DMA would just wrap around in IMEM. @@ -212,12 +222,7 @@ static void mem_bank_init(int chip_id, bool last) { if (chip_id == -1) { // First call, we clear SP_IMEM that will be used later. - // We run a DMA from RDRAM address > 8MiB where many areas return 0 on read. - // Notice that we can do this only after RI has been initialized. - while (*SP_DMA_BUSY) {} - *SP_RSP_ADDR = 0x1000; - *SP_DRAM_ADDR = 8*1024*1024 + 0x2000; - *SP_RD_LEN = 4096-1; + rsp_bzero_init(); return; } @@ -310,6 +315,7 @@ void stage1(void) // with this even if Everdrive itself doesn't use this IPL3 (but // might boot a game that does, and that game shouldn't clear // 0x80000318). + rsp_bzero_init(); rsp_bzero_async(0xA0000400, memsize-0x400-TOTAL_RESERVED_SIZE); } diff --git a/tools/ipl3.h b/tools/ipl3.h index fd077b8534..4a78fb46b2 100644 --- a/tools/ipl3.h +++ b/tools/ipl3.h @@ -4,210 +4,118 @@ unsigned char default_ipl3[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x69, 0x62, 0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x30, 0x44, 0xd2, 0x36, 0x3c, 0x1d, 0xa4, 0x00, - 0x37, 0xbd, 0x0f, 0xf0, 0x3c, 0x04, 0xa4, 0x00, 0x27, 0xbd, 0xff, 0xe0, - 0x24, 0x05, 0x10, 0x00, 0x24, 0x84, 0x10, 0x00, 0xaf, 0xbf, 0x00, 0x1c, - 0xaf, 0xb1, 0x00, 0x18, 0x04, 0x11, 0x00, 0x8a, 0xaf, 0xb0, 0x00, 0x14, - 0x00, 0x00, 0xd8, 0x25, 0x04, 0x11, 0x02, 0x32, 0x40, 0x04, 0x48, 0x00, - 0x00, 0x00, 0x10, 0x25, 0x40, 0x82, 0x68, 0x00, 0x40, 0x82, 0x48, 0x00, - 0x40, 0x82, 0x58, 0x00, 0x40, 0x82, 0x90, 0x00, 0x3c, 0x02, 0xa4, 0x30, - 0x8c, 0x51, 0x00, 0x04, 0x24, 0x02, 0x00, 0xb0, 0x32, 0x31, 0x00, 0xf0, - 0x12, 0x22, 0x00, 0x4b, 0x3c, 0x02, 0xa4, 0x70, 0x8c, 0x42, 0x00, 0x0c, - 0x10, 0x40, 0x00, 0x11, 0x00, 0x00, 0x80, 0x25, 0x3c, 0x02, 0x12, 0x34, - 0x3c, 0x06, 0xa0, 0x00, 0x24, 0x42, 0x56, 0x78, 0x3c, 0x05, 0x00, 0x20, - 0x3c, 0x04, 0x00, 0x80, 0x02, 0x06, 0x18, 0x21, 0xac, 0x60, 0x00, 0x00, - 0xac, 0x62, 0x00, 0x00, 0x8c, 0x63, 0x00, 0x00, 0x54, 0x62, 0x00, 0x5a, - 0x3c, 0x05, 0xff, 0xff, 0x02, 0x05, 0x80, 0x21, 0x16, 0x04, 0xff, 0xf9, - 0x02, 0x06, 0x18, 0x21, 0x10, 0x00, 0x00, 0x55, 0x3c, 0x05, 0xff, 0xff, - 0x3c, 0x04, 0xa4, 0x00, 0x04, 0x11, 0x01, 0x0f, 0x24, 0x84, 0x03, 0x38, - 0x00, 0x40, 0x80, 0x25, 0x3c, 0x04, 0xb0, 0x00, 0x24, 0x86, 0x11, 0x18, - 0x3c, 0x02, 0xa4, 0x60, 0x8c, 0x43, 0x00, 0x10, 0x30, 0x63, 0x00, 0x03, - 0x14, 0x60, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x85, 0x11, 0x18, - 0x3c, 0x07, 0x60, 0x00, 0x02, 0x05, 0x20, 0x23, 0x24, 0xe7, 0x00, 0x08, - 0x3c, 0x03, 0x80, 0x00, 0x00, 0x83, 0x18, 0x21, 0x00, 0xc7, 0x30, 0x21, - 0x24, 0xa5, 0xff, 0xff, 0xac, 0x43, 0x00, 0x00, 0xac, 0x46, 0x00, 0x04, - 0xac, 0x45, 0x00, 0x0c, 0x40, 0x80, 0xe0, 0x00, 0x40, 0x80, 0xe8, 0x00, - 0x3c, 0x02, 0x80, 0x00, 0x24, 0x45, 0x20, 0x00, 0xbc, 0x49, 0x00, 0x00, - 0x24, 0x42, 0x00, 0x10, 0x14, 0x45, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, - 0x3c, 0x02, 0x80, 0x00, 0x24, 0x45, 0x40, 0x00, 0xbc, 0x48, 0x00, 0x00, - 0x24, 0x42, 0x00, 0x20, 0x14, 0x45, 0xff, 0xfd, 0x00, 0x15, 0x32, 0x00, - 0x00, 0x14, 0x14, 0x00, 0x3a, 0x31, 0x00, 0xb0, 0x00, 0x46, 0x10, 0x25, - 0x2e, 0x31, 0x00, 0x01, 0x3c, 0x05, 0xa4, 0x00, 0x00, 0x51, 0x10, 0x25, - 0xac, 0xb0, 0x00, 0x00, 0xac, 0xa2, 0x00, 0x08, 0xac, 0xa0, 0x00, 0x0c, - 0x3c, 0x05, 0xa4, 0x60, 0x8c, 0xa2, 0x00, 0x10, 0x30, 0x42, 0x00, 0x01, - 0x14, 0x40, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0x7f, 0xff, - 0x34, 0x42, 0xff, 0xf0, 0x00, 0x82, 0x20, 0x21, 0x00, 0x60, 0x00, 0x08, - 0x00, 0x80, 0xe8, 0x25, 0x3c, 0x02, 0xa0, 0x00, 0x8c, 0x50, 0x03, 0x18, - 0x3c, 0x02, 0x00, 0x80, 0x12, 0x02, 0x00, 0x20, 0x3c, 0x02, 0x00, 0x40, - 0x16, 0x02, 0x00, 0x0c, 0x24, 0x02, 0x00, 0x20, 0x3c, 0x03, 0xa4, 0x60, - 0x8c, 0x62, 0x00, 0x10, 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, - 0x3c, 0x02, 0xb0, 0x00, 0x8c, 0x42, 0x00, 0x08, 0x3c, 0x03, 0x80, 0x40, - 0x00, 0x43, 0x10, 0x2b, 0x50, 0x40, 0x00, 0x01, 0x3c, 0x10, 0x00, 0x80, - 0x24, 0x02, 0x00, 0x20, 0x00, 0x00, 0x20, 0x25, 0x3c, 0x05, 0xa4, 0x30, - 0x8c, 0xa3, 0x00, 0x2c, 0x00, 0x04, 0x20, 0x40, 0x30, 0x63, 0x00, 0x01, - 0x24, 0x42, 0xff, 0xff, 0x14, 0x40, 0xff, 0xfb, 0x00, 0x64, 0x20, 0x25, - 0x04, 0x11, 0x01, 0xc1, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x05, 0xff, 0xff, - 0x24, 0xa5, 0x7c, 0x00, 0x3c, 0x04, 0xa0, 0x00, 0x02, 0x05, 0x28, 0x21, - 0x04, 0x11, 0x00, 0x1c, 0x24, 0x84, 0x04, 0x00, 0x10, 0x00, 0xff, 0xab, - 0x3c, 0x04, 0xb0, 0x00, 0x10, 0x00, 0xff, 0xec, 0x3c, 0x10, 0x00, 0x7c, + 0x00, 0x00, 0x00, 0x00, 0x25, 0x6a, 0x00, 0x24, 0x8d, 0x7f, 0x00, 0x24, + 0xad, 0x7f, 0xff, 0xc0, 0x15, 0x6a, 0xff, 0xfd, 0x25, 0x6b, 0x00, 0x04, + 0x3c, 0x09, 0xa4, 0x00, 0x3c, 0x0a, 0xb0, 0x00, 0x01, 0x20, 0x00, 0x08, + 0x25, 0x4b, 0x0f, 0xc0, 0x8d, 0x5f, 0x10, 0x40, 0xad, 0x3f, 0x00, 0x40, + 0x25, 0x4a, 0x00, 0x04, 0x15, 0x4b, 0xff, 0xfc, 0x25, 0x29, 0x00, 0x04, + 0x00, 0x00, 0x48, 0x25, 0x24, 0x0a, 0x00, 0x40, 0x27, 0xab, 0xe0, 0x50, + 0x01, 0x60, 0x00, 0x08, 0x27, 0xbf, 0xf5, 0x60, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x20, 0x4c, 0x69, 0x62, 0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e, 0x20, 0x49, - 0x50, 0x4c, 0x33, 0x20, 0x20, 0x43, 0x6f, 0x64, 0x65, 0x64, 0x20, 0x62, - 0x79, 0x20, 0x52, 0x61, 0x73, 0x6b, 0x79, 0x20, 0x3c, 0x03, 0xa4, 0x04, - 0x8c, 0x62, 0x00, 0x18, 0x14, 0x40, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x85, 0x28, 0x21, 0x00, 0x85, 0x10, 0x2b, 0x54, 0x40, 0x00, 0x03, - 0xac, 0x80, 0x00, 0x00, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, - 0x10, 0x00, 0xff, 0xfa, 0x24, 0x84, 0x00, 0x04, 0x3c, 0x02, 0xa0, 0x00, - 0x00, 0x82, 0x20, 0x25, 0xb0, 0x80, 0x00, 0x00, 0xb4, 0x80, 0x00, 0x07, - 0x28, 0xa2, 0x00, 0x09, 0x14, 0x40, 0x00, 0x16, 0x24, 0xa5, 0xff, 0xf8, - 0x00, 0xa4, 0x10, 0x21, 0xb0, 0x40, 0x00, 0x00, 0xb4, 0x40, 0x00, 0x07, - 0x3c, 0x06, 0x00, 0x10, 0x24, 0x84, 0x00, 0x08, 0x24, 0xc6, 0x00, 0x01, - 0x3c, 0x03, 0xa4, 0x04, 0x24, 0x09, 0x10, 0x00, 0x00, 0xa6, 0x38, 0x2a, - 0x14, 0xe0, 0x00, 0x02, 0x00, 0xa0, 0x10, 0x25, 0x3c, 0x02, 0x00, 0x10, - 0x8c, 0x68, 0x00, 0x14, 0x15, 0x00, 0xff, 0xfe, 0x24, 0x48, 0xff, 0xff, - 0x00, 0xa2, 0x28, 0x23, 0xac, 0x69, 0x00, 0x00, 0xac, 0x64, 0x00, 0x04, - 0xac, 0x68, 0x00, 0x0c, 0x1c, 0xa0, 0xff, 0xf4, 0x00, 0x82, 0x20, 0x21, - 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x14, 0xa0, 0x00, 0x04, - 0x00, 0x04, 0x25, 0x00, 0x3c, 0x05, 0x00, 0x20, 0x10, 0x00, 0xff, 0xde, - 0x00, 0x00, 0x00, 0x00, 0x3c, 0x05, 0x00, 0x1f, 0x10, 0x00, 0xff, 0xfc, - 0x34, 0xa5, 0x80, 0x00, 0x30, 0xa6, 0x00, 0x01, 0x50, 0xc0, 0x00, 0x05, - 0x03, 0x44, 0x20, 0x04, 0x3c, 0x02, 0xa4, 0x30, 0x24, 0x03, 0x20, 0x00, - 0xac, 0x43, 0x00, 0x00, 0x03, 0x44, 0x20, 0x04, 0x00, 0x85, 0x28, 0x21, - 0x00, 0x05, 0x28, 0x80, 0x3c, 0x02, 0xa3, 0xf0, 0x00, 0x45, 0x10, 0x21, - 0x8c, 0x43, 0x00, 0x00, 0x10, 0xc0, 0x00, 0x03, 0x24, 0x04, 0x10, 0x00, - 0x3c, 0x02, 0xa4, 0x30, 0xac, 0x44, 0x00, 0x00, 0x00, 0x03, 0x26, 0x00, - 0x00, 0x03, 0x16, 0x02, 0x00, 0x44, 0x10, 0x25, 0x00, 0x03, 0x22, 0x02, - 0x30, 0x84, 0xff, 0x00, 0x00, 0x44, 0x10, 0x25, 0x00, 0x03, 0x1a, 0x00, - 0x3c, 0x04, 0x00, 0xff, 0x00, 0x64, 0x18, 0x24, 0x03, 0xe0, 0x00, 0x08, - 0x00, 0x62, 0x10, 0x25, 0x24, 0x02, 0x01, 0x00, 0x24, 0x42, 0xff, 0xff, - 0x14, 0x40, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe0, 0x00, 0x08, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x1e, 0x00, 0x00, 0x06, 0x16, 0x02, - 0x00, 0x43, 0x10, 0x25, 0x00, 0x06, 0x1a, 0x02, 0x30, 0x63, 0xff, 0x00, - 0x00, 0x43, 0x10, 0x25, 0x00, 0x06, 0x32, 0x00, 0x3c, 0x03, 0x00, 0xff, - 0x00, 0xc3, 0x30, 0x24, 0x00, 0xc2, 0x30, 0x25, 0x24, 0x02, 0xff, 0xff, - 0x54, 0x82, 0x00, 0x07, 0x03, 0x44, 0x20, 0x04, 0x00, 0x05, 0x28, 0x80, - 0x3c, 0x02, 0xa3, 0xf8, 0x00, 0x45, 0x10, 0x21, 0xac, 0x46, 0x00, 0x00, - 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x28, 0x21, - 0x00, 0x05, 0x28, 0x80, 0x10, 0x00, 0xff, 0xf9, 0x3c, 0x02, 0xa3, 0xf0, - 0x27, 0xbd, 0xff, 0xe8, 0x38, 0xc7, 0x00, 0x3f, 0x00, 0x80, 0x40, 0x25, - 0xaf, 0xbf, 0x00, 0x14, 0x00, 0xa0, 0x48, 0x25, 0x10, 0xa0, 0x00, 0x02, - 0x24, 0x06, 0x10, 0x06, 0x24, 0x06, 0x10, 0x86, 0x3c, 0x03, 0x40, 0x00, - 0x00, 0x07, 0x17, 0x80, 0x00, 0x43, 0x10, 0x24, 0x00, 0x07, 0x18, 0xc2, - 0x00, 0x03, 0x1f, 0xc0, 0x00, 0x43, 0x10, 0x25, 0x3c, 0x04, 0x00, 0x40, - 0x00, 0x07, 0x1d, 0x40, 0x00, 0x64, 0x18, 0x24, 0x00, 0x43, 0x10, 0x25, - 0x00, 0x07, 0x1b, 0x00, 0x30, 0x63, 0x40, 0x00, 0x00, 0x43, 0x10, 0x25, - 0x3c, 0x04, 0x00, 0x80, 0x00, 0x07, 0x1c, 0xc0, 0x00, 0x64, 0x18, 0x24, - 0x00, 0x43, 0x10, 0x25, 0x00, 0x07, 0x1a, 0x80, 0x30, 0x63, 0x80, 0x00, - 0x00, 0x43, 0x10, 0x25, 0x00, 0x46, 0x30, 0x25, 0x24, 0x05, 0x00, 0x03, - 0x04, 0x11, 0xff, 0xca, 0x01, 0x00, 0x20, 0x25, 0x11, 0x20, 0x00, 0x1b, - 0x8f, 0xbf, 0x00, 0x14, 0x24, 0x05, 0x00, 0x03, 0x04, 0x11, 0xff, 0xbf, - 0x01, 0x00, 0x20, 0x25, 0x04, 0x11, 0xff, 0xa2, 0x00, 0x00, 0x00, 0x00, - 0x24, 0x05, 0x00, 0x03, 0x04, 0x11, 0xff, 0x9f, 0x01, 0x00, 0x20, 0x25, - 0x00, 0x02, 0x3f, 0x82, 0x00, 0x02, 0x1d, 0x42, 0x30, 0x63, 0x00, 0x02, - 0x30, 0xe7, 0x00, 0x01, 0x00, 0xe3, 0x38, 0x25, 0x00, 0x02, 0x1b, 0x02, - 0x30, 0x63, 0x00, 0x04, 0x00, 0xe3, 0x38, 0x25, 0x00, 0x02, 0x1f, 0x02, - 0x30, 0x63, 0x00, 0x08, 0x00, 0xe3, 0x38, 0x25, 0x00, 0x02, 0x1c, 0xc2, - 0x30, 0x63, 0x00, 0x10, 0x00, 0x02, 0x12, 0x82, 0x00, 0xe3, 0x38, 0x25, - 0x30, 0x42, 0x00, 0x20, 0x00, 0xe2, 0x38, 0x25, 0x8f, 0xbf, 0x00, 0x14, - 0x00, 0xe0, 0x10, 0x25, 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x18, - 0x27, 0xbd, 0xff, 0xb0, 0x3c, 0x03, 0xa4, 0x70, 0x24, 0x02, 0x00, 0x40, - 0xaf, 0xbf, 0x00, 0x34, 0xaf, 0xb4, 0x00, 0x2c, 0xaf, 0xb3, 0x00, 0x28, - 0xaf, 0xb1, 0x00, 0x20, 0xaf, 0xb0, 0x00, 0x1c, 0xe7, 0xb7, 0x00, 0x44, - 0xaf, 0xb5, 0x00, 0x30, 0xaf, 0xb2, 0x00, 0x24, 0xe7, 0xb9, 0x00, 0x4c, - 0xe7, 0xb8, 0x00, 0x48, 0xe7, 0xb6, 0x00, 0x40, 0xe7, 0xb5, 0x00, 0x3c, - 0xe7, 0xb4, 0x00, 0x38, 0xac, 0x62, 0x00, 0x04, 0x04, 0x11, 0xff, 0x92, - 0x00, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x14, 0xac, 0x60, 0x00, 0x08, - 0xac, 0x62, 0x00, 0x0c, 0xac, 0x60, 0x00, 0x00, 0x04, 0x11, 0xff, 0x8c, - 0x00, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x0e, 0xac, 0x62, 0x00, 0x00, - 0x04, 0x11, 0xff, 0x88, 0x24, 0x03, 0x01, 0x0f, 0x3c, 0x02, 0xa4, 0x30, - 0xac, 0x43, 0x00, 0x00, 0x3c, 0x02, 0x18, 0x08, 0x3c, 0x07, 0xa3, 0xf8, - 0x24, 0x42, 0x28, 0x38, 0x3c, 0x06, 0x00, 0x03, 0xac, 0xe2, 0x00, 0x08, - 0x00, 0x80, 0x98, 0x25, 0x24, 0x1a, 0x00, 0x08, 0x34, 0xc6, 0x80, 0xfc, - 0x24, 0x05, 0x00, 0x01, 0x24, 0x04, 0xff, 0xff, 0x04, 0x11, 0xff, 0x80, - 0x3c, 0x14, 0xa4, 0x00, 0x3c, 0x02, 0x04, 0x10, 0xc6, 0x97, 0x09, 0x2c, - 0xac, 0xe2, 0x00, 0x0c, 0x00, 0x00, 0x80, 0x25, 0xac, 0xe0, 0x00, 0x14, - 0x00, 0x00, 0x88, 0x25, 0x32, 0x02, 0xff, 0xff, 0x00, 0x10, 0x30, 0x80, - 0x00, 0x02, 0x11, 0x82, 0x00, 0x02, 0x13, 0xc0, 0x30, 0xc6, 0x00, 0xff, - 0x00, 0xc2, 0x30, 0x25, 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xff, 0x71, - 0x24, 0x04, 0x01, 0xff, 0x24, 0x06, 0x10, 0x06, 0x24, 0x05, 0x00, 0x03, - 0x04, 0x11, 0xff, 0x6d, 0x02, 0x00, 0x20, 0x25, 0x24, 0x05, 0x00, 0x03, - 0x04, 0x11, 0xff, 0x49, 0x02, 0x00, 0x20, 0x25, 0x30, 0x42, 0x00, 0x02, - 0x14, 0x40, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x05, - 0x00, 0x10, 0x80, 0x43, 0x24, 0x05, 0x00, 0x01, 0x02, 0x60, 0xf8, 0x09, - 0x26, 0x04, 0xff, 0xfe, 0x00, 0x10, 0x80, 0x43, 0x24, 0x02, 0x00, 0x01, - 0x02, 0x02, 0x10, 0x04, 0x24, 0x42, 0xff, 0xff, 0x3c, 0x03, 0x00, 0x78, - 0x00, 0x02, 0x14, 0xc0, 0x00, 0x43, 0x10, 0x24, 0x3c, 0x03, 0x00, 0x06, - 0x24, 0x63, 0x36, 0x34, 0x00, 0x43, 0x10, 0x25, 0x8f, 0xbf, 0x00, 0x34, - 0x3c, 0x03, 0xa4, 0x70, 0xac, 0x62, 0x00, 0x10, 0x8c, 0x62, 0x00, 0x10, - 0x8f, 0xb5, 0x00, 0x30, 0x8f, 0xb4, 0x00, 0x2c, 0x8f, 0xb3, 0x00, 0x28, - 0x8f, 0xb2, 0x00, 0x24, 0x8f, 0xb0, 0x00, 0x1c, 0xc7, 0xb9, 0x00, 0x4c, - 0xc7, 0xb8, 0x00, 0x48, 0xc7, 0xb7, 0x00, 0x44, 0xc7, 0xb6, 0x00, 0x40, - 0xc7, 0xb5, 0x00, 0x3c, 0xc7, 0xb4, 0x00, 0x38, 0x02, 0x20, 0x10, 0x25, - 0x8f, 0xb1, 0x00, 0x20, 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x50, - 0x12, 0x00, 0x00, 0x03, 0x00, 0x00, 0x28, 0x25, 0x02, 0x60, 0xf8, 0x09, - 0x26, 0x04, 0xff, 0xfe, 0x44, 0x80, 0xa0, 0x00, 0x3c, 0x02, 0xa4, 0x00, - 0x3c, 0x12, 0xa0, 0x00, 0xc4, 0x59, 0x09, 0x30, 0x02, 0x32, 0x90, 0x21, - 0x46, 0x00, 0xa5, 0x86, 0x00, 0x00, 0xa8, 0x25, 0x46, 0x00, 0xa6, 0x06, - 0x02, 0x00, 0x20, 0x25, 0x32, 0xa6, 0x00, 0xff, 0x44, 0x80, 0xa8, 0x00, - 0x04, 0x11, 0xff, 0x4b, 0x00, 0x00, 0x28, 0x25, 0x24, 0x03, 0x00, 0x0a, - 0x24, 0x04, 0xff, 0xff, 0xae, 0x44, 0x00, 0x00, 0xae, 0x44, 0x00, 0x04, - 0x92, 0x42, 0x00, 0x05, 0x30, 0x42, 0x00, 0xff, 0xa3, 0xa2, 0x00, 0x10, - 0x93, 0xa2, 0x00, 0x10, 0x30, 0x42, 0x00, 0xff, 0x14, 0x40, 0x00, 0x2c, - 0x00, 0x00, 0x00, 0x00, 0x24, 0x63, 0xff, 0xff, 0x14, 0x60, 0xff, 0xf5, - 0x00, 0x00, 0x00, 0x00, 0x46, 0x15, 0xc0, 0x3c, 0x00, 0x00, 0x00, 0x00, - 0x45, 0x02, 0x00, 0x05, 0x46, 0x19, 0xad, 0x42, 0x8e, 0x44, 0x00, 0x04, - 0x04, 0x11, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x46, 0x19, 0xad, 0x42, - 0x44, 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x80, 0x00, 0x20, - 0x46, 0x16, 0xad, 0x81, 0x46, 0x00, 0xb5, 0x82, 0xc6, 0x80, 0x09, 0x2c, - 0x46, 0x15, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x45, 0x01, 0x00, 0x05, - 0x46, 0x16, 0xa5, 0x00, 0x26, 0xb5, 0x00, 0x01, 0x24, 0x02, 0x00, 0x40, - 0x56, 0xa2, 0xff, 0xd8, 0x46, 0x00, 0xad, 0x86, 0x3c, 0x02, 0xa4, 0x00, - 0xc4, 0x40, 0x09, 0x34, 0x3c, 0x02, 0xa4, 0x00, 0x46, 0x00, 0xa5, 0x02, - 0xc4, 0x40, 0x09, 0x38, 0x46, 0x00, 0xa5, 0x00, 0x46, 0x00, 0xa0, 0x0d, - 0x44, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x40, 0x00, 0x10, - 0x00, 0x00, 0x60, 0x25, 0x3c, 0x06, 0x00, 0x03, 0x34, 0xc6, 0x80, 0xf4, - 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xff, 0x00, 0x02, 0x00, 0x20, 0x25, - 0x10, 0x00, 0xff, 0x9e, 0x00, 0x10, 0x80, 0x43, 0x93, 0xa2, 0x00, 0x10, - 0x30, 0x42, 0x00, 0x01, 0x54, 0x40, 0x00, 0x01, 0x46, 0x17, 0xad, 0x40, - 0x93, 0xa2, 0x00, 0x10, 0x30, 0x42, 0x00, 0xff, 0x10, 0x00, 0xff, 0xc9, - 0x00, 0x02, 0x10, 0x42, 0x00, 0x00, 0x68, 0x25, 0x00, 0x00, 0x58, 0x25, - 0x24, 0x0e, 0x00, 0x40, 0x02, 0x00, 0x20, 0x25, 0x31, 0x66, 0x00, 0xff, - 0x04, 0x11, 0xff, 0x06, 0x24, 0x05, 0x00, 0x01, 0x00, 0x4a, 0x18, 0x23, - 0x00, 0x03, 0x27, 0xc3, 0x00, 0x83, 0x18, 0x26, 0x11, 0x60, 0x00, 0x3a, - 0x00, 0x64, 0x18, 0x23, 0x00, 0x6d, 0x20, 0x2a, 0x54, 0x80, 0x00, 0x02, - 0x01, 0x60, 0x60, 0x25, 0x01, 0xa0, 0x18, 0x25, 0x01, 0x42, 0x10, 0x2a, - 0x14, 0x40, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x25, 0x6b, 0x00, 0x01, - 0x15, 0x6e, 0xff, 0xee, 0x00, 0x60, 0x68, 0x25, 0x11, 0x80, 0xff, 0xda, - 0x31, 0x86, 0x00, 0xff, 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xfe, 0xf2, - 0x02, 0x00, 0x20, 0x25, 0x00, 0x00, 0x28, 0x25, 0x04, 0x11, 0xfe, 0xb7, - 0x02, 0x00, 0x20, 0x25, 0x00, 0x40, 0x38, 0x25, 0x30, 0x43, 0xf0, 0x00, - 0x24, 0x02, 0x10, 0x00, 0x14, 0x62, 0xff, 0xcf, 0x30, 0xe2, 0x0f, 0x00, - 0x24, 0x03, 0x09, 0x00, 0x14, 0x43, 0xff, 0xcc, 0x30, 0xe2, 0x00, 0xf0, - 0x24, 0x03, 0x00, 0xb0, 0x14, 0x43, 0xff, 0xc9, 0x30, 0xe2, 0x00, 0x04, - 0x10, 0x40, 0xff, 0xc7, 0x24, 0x05, 0x00, 0x09, 0x04, 0x11, 0xfe, 0xa8, - 0x02, 0x00, 0x20, 0x25, 0x00, 0x02, 0x14, 0x02, 0x24, 0x03, 0x00, 0x05, - 0x10, 0x43, 0x00, 0x17, 0x3c, 0x06, 0x04, 0x0a, 0x30, 0xe7, 0x00, 0x01, - 0x14, 0xe0, 0x00, 0x03, 0x24, 0xc6, 0x1c, 0x10, 0x3c, 0x06, 0x04, 0x12, - 0x24, 0xc6, 0x0c, 0x08, 0x24, 0x05, 0x00, 0x06, 0x04, 0x11, 0xfe, 0xbd, - 0x02, 0x00, 0x20, 0x25, 0x3c, 0x02, 0xa0, 0x20, 0x3c, 0x03, 0x00, 0x08, - 0x02, 0x22, 0x10, 0x21, 0x8e, 0x44, 0x00, 0x00, 0x8e, 0x44, 0x00, 0x04, - 0x02, 0x43, 0x90, 0x21, 0x16, 0x42, 0xff, 0xfc, 0x00, 0x00, 0x00, 0x00, - 0x3c, 0x02, 0x00, 0x20, 0x26, 0x10, 0x00, 0x02, 0x10, 0x00, 0xff, 0x38, - 0x02, 0x22, 0x88, 0x21, 0x10, 0x00, 0xff, 0xca, 0x00, 0x00, 0x60, 0x25, - 0x10, 0x00, 0xff, 0xee, 0x24, 0xc6, 0x1c, 0x10, 0x3f, 0x80, 0x00, 0x00, - 0x3c, 0x4c, 0xcc, 0xcd, 0x40, 0x0c, 0xcc, 0xcd, 0x3f, 0x00, 0x00, 0x00, - 0x3c, 0x02, 0xcc, 0x9e, 0x24, 0x42, 0x2d, 0x51, 0x00, 0x82, 0x00, 0x18, - 0x00, 0x00, 0x10, 0x12, 0x00, 0x02, 0x1b, 0xc0, 0x00, 0x02, 0x14, 0x42, - 0x00, 0x43, 0x10, 0x25, 0x3c, 0x03, 0x1b, 0x87, 0x24, 0x63, 0x35, 0x93, - 0x00, 0x43, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, 0x00, 0x5b, 0x10, 0x26, - 0x00, 0x02, 0x1b, 0x40, 0x00, 0x02, 0x14, 0xc2, 0x00, 0x62, 0x10, 0x25, - 0x00, 0x02, 0x18, 0x80, 0x00, 0x62, 0x18, 0x21, 0x3c, 0x02, 0xe6, 0x54, - 0x24, 0x42, 0x6b, 0x64, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x62, 0xd8, 0x21, - 0x00, 0x1b, 0x1c, 0x02, 0x3c, 0x02, 0x85, 0xeb, 0x00, 0x7b, 0x18, 0x26, - 0x34, 0x42, 0xca, 0x6b, 0x00, 0x62, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, - 0x00, 0x02, 0x1b, 0x42, 0x00, 0x62, 0x18, 0x26, 0x3c, 0x02, 0xc2, 0xb2, - 0x34, 0x42, 0xae, 0x35, 0x00, 0x62, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, - 0x00, 0x02, 0x1c, 0x02, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x62, 0x10, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -339,189 +247,713 @@ unsigned char default_ipl3[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x5c, - 0xe9, 0xeb, 0x8a, 0x06, 0x3c, 0x0a, 0xb0, 0x00, 0x3c, 0x09, 0xa4, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x0e, 0x41, + 0x33, 0x40, 0xba, 0x87, 0x3c, 0x0a, 0xb0, 0x00, 0x3c, 0x09, 0xa4, 0x00, 0x25, 0x4b, 0x0f, 0xc0, 0x25, 0x3d, 0x1f, 0xf0, 0x8d, 0x5f, 0x00, 0x40, 0xad, 0x3f, 0x00, 0x40, 0x25, 0x4a, 0x00, 0x04, 0x15, 0x4b, 0xff, 0xfc, 0x25, 0x29, 0x00, 0x04, 0x00, 0x00, 0x48, 0x25, 0x24, 0x0a, 0x00, 0x40, 0x27, 0xab, 0xe0, 0x50, 0x01, 0x60, 0x00, 0x08, 0x27, 0xbf, 0xf5, 0x60, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x80, - 0x8c, 0x62, 0x00, 0x18, 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, - 0x3c, 0x02, 0xbf, 0xc0, 0x24, 0x03, 0x00, 0x08, 0xac, 0x43, 0x07, 0xfc, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x44, 0xd2, 0x36, + 0x3c, 0x1d, 0xa4, 0x00, 0x37, 0xbd, 0x0f, 0xf0, 0x27, 0xbd, 0xff, 0xc8, + 0x3c, 0x02, 0xb0, 0x00, 0x24, 0x42, 0x27, 0xa0, 0xaf, 0xbf, 0x00, 0x34, + 0xaf, 0xbe, 0x00, 0x30, 0xaf, 0xb2, 0x00, 0x2c, 0xaf, 0xb1, 0x00, 0x28, + 0xaf, 0xb0, 0x00, 0x24, 0x00, 0x40, 0xf8, 0x09, 0x00, 0x00, 0xd8, 0x25, + 0x3c, 0x1e, 0xb0, 0x00, 0x3c, 0x05, 0xb0, 0x00, 0x00, 0x00, 0x30, 0x25, + 0x24, 0xa5, 0x2a, 0x98, 0x27, 0xde, 0x24, 0x7c, 0x03, 0xc0, 0xf8, 0x09, + 0x24, 0x04, 0x00, 0x0e, 0x04, 0x11, 0x02, 0xbb, 0x40, 0x04, 0x48, 0x00, + 0x00, 0x00, 0x10, 0x25, 0x40, 0x82, 0x68, 0x00, 0x40, 0x82, 0x48, 0x00, + 0x40, 0x82, 0x58, 0x00, 0x40, 0x82, 0x90, 0x00, 0x3c, 0x02, 0xa4, 0x30, + 0x8c, 0x51, 0x00, 0x04, 0x24, 0x02, 0x00, 0xb0, 0x32, 0x31, 0x00, 0xf0, + 0x12, 0x22, 0x00, 0x65, 0x3c, 0x02, 0xa4, 0x70, 0x8c, 0x42, 0x00, 0x0c, + 0x10, 0x40, 0x00, 0x11, 0x00, 0x00, 0x80, 0x25, 0x3c, 0x02, 0x12, 0x34, + 0x3c, 0x06, 0xa0, 0x00, 0x24, 0x42, 0x56, 0x78, 0x3c, 0x05, 0x00, 0x20, + 0x3c, 0x04, 0x00, 0x80, 0x02, 0x06, 0x18, 0x21, 0xac, 0x60, 0x00, 0x00, + 0xac, 0x62, 0x00, 0x00, 0x8c, 0x63, 0x00, 0x00, 0x14, 0x62, 0x00, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x05, 0x80, 0x21, 0x16, 0x04, 0xff, 0xf9, + 0x02, 0x06, 0x18, 0x21, 0x10, 0x00, 0x00, 0x6e, 0x00, 0x00, 0x00, 0x00, + 0x3c, 0x04, 0xa4, 0x00, 0x04, 0x11, 0x01, 0x43, 0x24, 0x84, 0x03, 0xc8, + 0x00, 0x40, 0x80, 0x25, 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x04, 0x00, 0x0e, + 0x02, 0x00, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, 0x03, 0xc0, 0xf8, 0x09, + 0x24, 0xa5, 0x2a, 0xac, 0x3c, 0x04, 0xb0, 0x00, 0x24, 0x82, 0x1c, 0x18, + 0x3c, 0x08, 0xa4, 0x60, 0x8d, 0x03, 0x00, 0x10, 0x30, 0x63, 0x00, 0x03, + 0x14, 0x60, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x8c, 0x92, 0x1c, 0x18, + 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x47, 0x00, 0x08, 0x24, 0xa5, 0x2a, 0xc0, + 0x24, 0x04, 0x00, 0x07, 0xaf, 0xb2, 0x00, 0x10, 0x24, 0x06, 0x00, 0x02, + 0x03, 0xc0, 0xf8, 0x09, 0xaf, 0xa2, 0x00, 0x18, 0x8f, 0xa2, 0x00, 0x18, + 0x3c, 0x05, 0x60, 0x00, 0x24, 0xa5, 0x00, 0x08, 0x02, 0x12, 0x20, 0x23, + 0x3c, 0x03, 0x80, 0x00, 0x3c, 0x08, 0xa4, 0x60, 0x00, 0x45, 0x10, 0x21, + 0x00, 0x83, 0x18, 0x21, 0xad, 0x03, 0x00, 0x00, 0x3c, 0x05, 0xa4, 0x04, + 0xad, 0x02, 0x00, 0x04, 0x8c, 0xa2, 0x00, 0x18, 0x14, 0x40, 0xff, 0xfe, + 0x3c, 0x02, 0xa4, 0x60, 0x26, 0x52, 0xff, 0xff, 0xac, 0x52, 0x00, 0x0c, 0x40, 0x80, 0xe0, 0x00, 0x40, 0x80, 0xe8, 0x00, 0x3c, 0x02, 0x80, 0x00, - 0x24, 0x43, 0x20, 0x00, 0xbc, 0x49, 0x00, 0x00, 0x24, 0x42, 0x00, 0x10, - 0x14, 0x43, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0x80, 0x00, - 0x24, 0x43, 0x40, 0x00, 0xbc, 0x48, 0x00, 0x00, 0x24, 0x42, 0x00, 0x20, - 0x14, 0x43, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0xa4, 0x00, - 0x8c, 0x42, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x04, 0x8c, 0x65, 0x00, 0x14, - 0x14, 0xa0, 0xff, 0xfe, 0x3c, 0x05, 0xa4, 0x00, 0x24, 0xa5, 0x10, 0x00, - 0xac, 0x65, 0x00, 0x00, 0x24, 0x45, 0x80, 0x00, 0xac, 0x65, 0x00, 0x04, - 0x34, 0x05, 0x80, 0x00, 0xac, 0x65, 0x00, 0x0c, 0x3c, 0x03, 0xa4, 0x04, - 0x8c, 0x65, 0x00, 0x14, 0x14, 0xa0, 0xff, 0xfe, 0x3c, 0x05, 0xa4, 0x00, - 0x24, 0xa5, 0x00, 0x10, 0xac, 0x65, 0x00, 0x00, 0x3c, 0x05, 0x00, 0x80, - 0x24, 0xa5, 0x20, 0x00, 0xac, 0x65, 0x00, 0x04, 0x24, 0x05, 0x0f, 0xef, - 0xac, 0x65, 0x00, 0x08, 0x3c, 0x05, 0xa4, 0x80, 0x8c, 0xa3, 0x00, 0x18, - 0x30, 0x63, 0x00, 0x03, 0x14, 0x60, 0xff, 0xfd, 0x3c, 0x03, 0x7f, 0xff, - 0x34, 0x63, 0xff, 0xf0, 0xac, 0xa0, 0x00, 0x18, 0x00, 0x43, 0x10, 0x21, - 0x00, 0x80, 0x00, 0x08, 0x00, 0x40, 0xe8, 0x25, 0x00, 0x00, 0x07, 0x78, - 0x00, 0x00, 0x00, 0x00, 0x27, 0xbd, 0xff, 0xa0, 0x3c, 0x02, 0x80, 0x7f, - 0x34, 0x43, 0x80, 0x00, 0xaf, 0xbe, 0x00, 0x58, 0xaf, 0xbf, 0x00, 0x5c, - 0xaf, 0xb7, 0x00, 0x54, 0xaf, 0xb6, 0x00, 0x50, 0xaf, 0xb5, 0x00, 0x4c, - 0xaf, 0xb4, 0x00, 0x48, 0xaf, 0xb3, 0x00, 0x44, 0xaf, 0xb2, 0x00, 0x40, - 0xaf, 0xb1, 0x00, 0x3c, 0xaf, 0xb0, 0x00, 0x38, 0x03, 0xa0, 0xf0, 0x25, - 0x34, 0x42, 0x90, 0x00, 0xbc, 0x71, 0x00, 0x00, 0x24, 0x63, 0x00, 0x10, - 0x14, 0x62, 0xff, 0xfd, 0x3c, 0x07, 0x10, 0x00, 0x3c, 0x03, 0x7f, 0x45, - 0x3c, 0x02, 0x14, 0x00, 0x24, 0xf4, 0x10, 0x00, 0x3c, 0x04, 0xa0, 0x00, - 0x24, 0x63, 0x4c, 0x46, 0x24, 0x42, 0x10, 0x00, 0x02, 0x84, 0x28, 0x25, - 0x8c, 0xa5, 0x00, 0x00, 0x50, 0xa3, 0x00, 0xdf, 0x00, 0x14, 0x12, 0x00, - 0x26, 0x94, 0x01, 0x00, 0x16, 0x82, 0xff, 0xfb, 0x02, 0x84, 0x28, 0x25, - 0x3c, 0x04, 0xb0, 0x00, 0x10, 0x00, 0x00, 0xe6, 0x24, 0x84, 0x17, 0xbf, - 0x16, 0x22, 0x00, 0x67, 0x24, 0x12, 0x00, 0x20, 0x24, 0x12, 0x00, 0x38, - 0x24, 0x02, 0x00, 0x24, 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x54, 0x10, 0x21, - 0x00, 0x43, 0x10, 0x25, 0x24, 0x03, 0x00, 0x02, 0x8c, 0x48, 0x00, 0x00, - 0x16, 0x23, 0x00, 0x02, 0x24, 0x02, 0x00, 0x2c, 0x24, 0x02, 0x00, 0x38, - 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x54, 0x10, 0x21, 0x00, 0x43, 0x10, 0x25, + 0x24, 0x45, 0x20, 0x00, 0xbc, 0x49, 0x00, 0x00, 0xbc, 0x49, 0x00, 0x10, + 0xbc, 0x49, 0x00, 0x20, 0xbc, 0x49, 0x00, 0x30, 0x24, 0x42, 0x00, 0x40, + 0x14, 0x45, 0xff, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0x80, 0x00, + 0x24, 0x45, 0x40, 0x00, 0xbc, 0x48, 0x00, 0x00, 0xbc, 0x48, 0x00, 0x20, + 0xbc, 0x48, 0x00, 0x40, 0xbc, 0x48, 0x00, 0x60, 0x24, 0x42, 0x00, 0x80, + 0x14, 0x45, 0xff, 0xfa, 0x00, 0x15, 0x32, 0x00, 0x00, 0x14, 0x14, 0x00, + 0x3a, 0x31, 0x00, 0xb0, 0x00, 0x46, 0x10, 0x25, 0x2e, 0x31, 0x00, 0x01, + 0x3c, 0x05, 0xa4, 0x00, 0x00, 0x51, 0x10, 0x25, 0xac, 0xb0, 0x00, 0x00, + 0xac, 0xa2, 0x00, 0x08, 0xac, 0xa0, 0x00, 0x0c, 0x3c, 0x05, 0xa4, 0x60, + 0x8c, 0xa2, 0x00, 0x10, 0x30, 0x42, 0x00, 0x01, 0x14, 0x40, 0xff, 0xfd, + 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0x7f, 0xff, 0x34, 0x42, 0xff, 0xf0, + 0x00, 0x82, 0x20, 0x21, 0x00, 0x60, 0x00, 0x08, 0x00, 0x80, 0xe8, 0x25, + 0x3c, 0x02, 0xa0, 0x00, 0x8c, 0x50, 0x03, 0x18, 0x3c, 0x02, 0x00, 0x80, + 0x12, 0x02, 0x00, 0x21, 0x3c, 0x02, 0x00, 0x40, 0x16, 0x02, 0x00, 0x0c, + 0x24, 0x02, 0x00, 0x20, 0x3c, 0x03, 0xa4, 0x60, 0x8c, 0x62, 0x00, 0x10, + 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, 0x3c, 0x02, 0xb0, 0x00, + 0x8c, 0x42, 0x00, 0x08, 0x3c, 0x03, 0x80, 0x40, 0x00, 0x43, 0x10, 0x2b, + 0x50, 0x40, 0x00, 0x01, 0x3c, 0x10, 0x00, 0x80, 0x24, 0x02, 0x00, 0x20, + 0x00, 0x00, 0x20, 0x25, 0x3c, 0x05, 0xa4, 0x30, 0x8c, 0xa3, 0x00, 0x2c, + 0x00, 0x04, 0x20, 0x40, 0x30, 0x63, 0x00, 0x01, 0x24, 0x42, 0xff, 0xff, + 0x14, 0x40, 0xff, 0xfb, 0x00, 0x64, 0x20, 0x25, 0x04, 0x11, 0x02, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x11, 0x00, 0x12, 0x3c, 0x05, 0xff, 0xff, + 0x24, 0xa5, 0x7c, 0x00, 0x3c, 0x04, 0xa0, 0x00, 0x02, 0x05, 0x28, 0x21, + 0x04, 0x11, 0x00, 0x1a, 0x24, 0x84, 0x04, 0x00, 0x10, 0x00, 0xff, 0x90, + 0x3c, 0x05, 0xb0, 0x00, 0x10, 0x00, 0xff, 0xeb, 0x3c, 0x10, 0x00, 0x7c, + 0x20, 0x4c, 0x69, 0x62, 0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e, 0x20, 0x49, + 0x50, 0x4c, 0x33, 0x20, 0x20, 0x43, 0x6f, 0x64, 0x65, 0x64, 0x20, 0x62, + 0x79, 0x20, 0x52, 0x61, 0x73, 0x6b, 0x79, 0x20, 0x3c, 0x02, 0xa4, 0x04, + 0x8c, 0x43, 0x00, 0x18, 0x14, 0x60, 0xff, 0xfe, 0x24, 0x04, 0x10, 0x00, + 0xac, 0x44, 0x00, 0x00, 0x3c, 0x02, 0x00, 0x80, 0x3c, 0x03, 0xa4, 0x04, + 0x24, 0x42, 0x20, 0x00, 0xac, 0x62, 0x00, 0x04, 0x24, 0x02, 0x0f, 0xff, + 0xac, 0x62, 0x00, 0x08, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x3c, 0x02, 0xa0, 0x00, 0x00, 0x82, 0x20, 0x25, 0xb0, 0x80, 0x00, 0x00, + 0xb4, 0x80, 0x00, 0x07, 0x28, 0xa2, 0x00, 0x09, 0x14, 0x40, 0x00, 0x19, + 0x24, 0xa5, 0xff, 0xf8, 0x00, 0xa4, 0x10, 0x21, 0xb0, 0x40, 0x00, 0x00, + 0xb4, 0x40, 0x00, 0x07, 0x3c, 0x09, 0x00, 0x10, 0x24, 0x84, 0x00, 0x08, + 0x24, 0x0a, 0xf0, 0x00, 0x3c, 0x03, 0xa4, 0x04, 0x24, 0x08, 0x10, 0x00, + 0x00, 0xa9, 0x30, 0x2a, 0x10, 0xc0, 0x00, 0x05, 0x3c, 0x02, 0x00, 0x10, + 0x28, 0xa6, 0x10, 0x00, 0x14, 0xc0, 0x00, 0x02, 0x00, 0xa0, 0x10, 0x25, + 0x00, 0xaa, 0x10, 0x24, 0x8c, 0x67, 0x00, 0x14, 0x14, 0xe0, 0xff, 0xfe, + 0x24, 0x47, 0xff, 0xff, 0x00, 0xa2, 0x28, 0x23, 0xac, 0x68, 0x00, 0x00, + 0xac, 0x64, 0x00, 0x04, 0xac, 0x67, 0x00, 0x0c, 0x1c, 0xa0, 0xff, 0xf1, + 0x00, 0x82, 0x20, 0x21, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x02, 0xff, 0xff, 0x14, 0x82, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0xff, 0xce, 0x00, 0x00, 0x00, 0x00, 0x14, 0xa0, 0x00, 0x04, + 0x00, 0x04, 0x25, 0x00, 0x3c, 0x05, 0x00, 0x20, 0x10, 0x00, 0xff, 0xd6, + 0x00, 0x00, 0x00, 0x00, 0x3c, 0x05, 0x00, 0x1f, 0x10, 0x00, 0xff, 0xfc, + 0x34, 0xa5, 0x80, 0x00, 0x30, 0xa6, 0x00, 0x01, 0x50, 0xc0, 0x00, 0x05, + 0x03, 0x44, 0x20, 0x04, 0x3c, 0x02, 0xa4, 0x30, 0x24, 0x03, 0x20, 0x00, + 0xac, 0x43, 0x00, 0x00, 0x03, 0x44, 0x20, 0x04, 0x00, 0x85, 0x28, 0x21, + 0x00, 0x05, 0x28, 0x80, 0x3c, 0x02, 0xa3, 0xf0, 0x00, 0x45, 0x10, 0x21, + 0x8c, 0x43, 0x00, 0x00, 0x10, 0xc0, 0x00, 0x03, 0x24, 0x04, 0x10, 0x00, + 0x3c, 0x02, 0xa4, 0x30, 0xac, 0x44, 0x00, 0x00, 0x00, 0x03, 0x26, 0x00, + 0x00, 0x03, 0x16, 0x02, 0x00, 0x44, 0x10, 0x25, 0x00, 0x03, 0x22, 0x02, + 0x30, 0x84, 0xff, 0x00, 0x00, 0x44, 0x10, 0x25, 0x00, 0x03, 0x1a, 0x00, + 0x3c, 0x04, 0x00, 0xff, 0x00, 0x64, 0x18, 0x24, 0x03, 0xe0, 0x00, 0x08, + 0x00, 0x62, 0x10, 0x25, 0x24, 0x02, 0x01, 0x00, 0x24, 0x42, 0xff, 0xff, + 0x14, 0x40, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x03, 0xe0, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x1e, 0x00, 0x00, 0x06, 0x16, 0x02, + 0x00, 0x43, 0x10, 0x25, 0x00, 0x06, 0x1a, 0x02, 0x30, 0x63, 0xff, 0x00, + 0x00, 0x43, 0x10, 0x25, 0x00, 0x06, 0x32, 0x00, 0x3c, 0x03, 0x00, 0xff, + 0x00, 0xc3, 0x30, 0x24, 0x00, 0xc2, 0x30, 0x25, 0x24, 0x02, 0xff, 0xff, + 0x14, 0x82, 0x00, 0x07, 0x28, 0x82, 0x02, 0x00, 0x00, 0x05, 0x28, 0x80, + 0x3c, 0x02, 0xa3, 0xf8, 0x00, 0x45, 0x10, 0x21, 0xac, 0x46, 0x00, 0x00, + 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x14, 0x40, 0x00, 0x0c, + 0x03, 0x44, 0x20, 0x04, 0x27, 0xbd, 0xff, 0xe8, 0x3c, 0x05, 0xb0, 0x00, + 0x3c, 0x02, 0xb0, 0x00, 0x00, 0x00, 0x30, 0x25, 0xaf, 0xbf, 0x00, 0x14, + 0x24, 0xa5, 0x2a, 0xcc, 0x24, 0x42, 0x24, 0x7c, 0x00, 0x40, 0xf8, 0x09, + 0x24, 0x04, 0x00, 0x1f, 0x10, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x85, 0x28, 0x21, 0x00, 0x05, 0x28, 0x80, 0x10, 0x00, 0xff, 0xec, + 0x3c, 0x02, 0xa3, 0xf0, 0x27, 0xbd, 0xff, 0xe0, 0xaf, 0xb2, 0x00, 0x18, + 0xaf, 0xb1, 0x00, 0x14, 0xaf, 0xb0, 0x00, 0x10, 0xaf, 0xbf, 0x00, 0x1c, + 0x38, 0xd0, 0x00, 0x3f, 0x00, 0x80, 0x88, 0x25, 0x00, 0xa0, 0x90, 0x25, + 0x10, 0xa0, 0x00, 0x02, 0x24, 0x06, 0x00, 0x46, 0x24, 0x06, 0x00, 0xc6, + 0x3c, 0x03, 0x40, 0x00, 0x00, 0x10, 0x17, 0x80, 0x00, 0x43, 0x10, 0x24, + 0x00, 0x10, 0x18, 0xc2, 0x00, 0x03, 0x1f, 0xc0, 0x00, 0x43, 0x10, 0x25, + 0x3c, 0x04, 0x00, 0x40, 0x00, 0x10, 0x1d, 0x40, 0x00, 0x64, 0x18, 0x24, + 0x00, 0x43, 0x10, 0x25, 0x00, 0x10, 0x1b, 0x00, 0x30, 0x63, 0x40, 0x00, + 0x00, 0x43, 0x10, 0x25, 0x3c, 0x04, 0x00, 0x80, 0x00, 0x10, 0x1c, 0xc0, + 0x00, 0x64, 0x18, 0x24, 0x00, 0x43, 0x10, 0x25, 0x00, 0x10, 0x1a, 0x80, + 0x30, 0x63, 0x80, 0x00, 0x00, 0x43, 0x10, 0x25, 0x00, 0x46, 0x30, 0x25, + 0x24, 0x05, 0x00, 0x03, 0x04, 0x11, 0xff, 0xba, 0x02, 0x20, 0x20, 0x25, + 0x12, 0x40, 0x00, 0x1b, 0x8f, 0xbf, 0x00, 0x1c, 0x24, 0x05, 0x00, 0x03, + 0x04, 0x11, 0xff, 0xaf, 0x02, 0x20, 0x20, 0x25, 0x04, 0x11, 0xff, 0x92, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x05, 0x00, 0x03, 0x04, 0x11, 0xff, 0x8f, + 0x02, 0x20, 0x20, 0x25, 0x00, 0x02, 0x87, 0x82, 0x00, 0x02, 0x1d, 0x42, + 0x30, 0x63, 0x00, 0x02, 0x32, 0x10, 0x00, 0x01, 0x02, 0x03, 0x80, 0x25, + 0x00, 0x02, 0x1b, 0x02, 0x30, 0x63, 0x00, 0x04, 0x02, 0x03, 0x80, 0x25, + 0x00, 0x02, 0x1f, 0x02, 0x30, 0x63, 0x00, 0x08, 0x02, 0x03, 0x80, 0x25, + 0x00, 0x02, 0x1c, 0xc2, 0x30, 0x63, 0x00, 0x10, 0x00, 0x02, 0x12, 0x82, + 0x02, 0x03, 0x80, 0x25, 0x30, 0x42, 0x00, 0x20, 0x02, 0x02, 0x80, 0x25, + 0x8f, 0xbf, 0x00, 0x1c, 0x8f, 0xb2, 0x00, 0x18, 0x8f, 0xb1, 0x00, 0x14, + 0x02, 0x00, 0x10, 0x25, 0x8f, 0xb0, 0x00, 0x10, 0x03, 0xe0, 0x00, 0x08, + 0x27, 0xbd, 0x00, 0x20, 0x27, 0xbd, 0xff, 0x88, 0x3c, 0x03, 0xa4, 0x70, + 0x24, 0x02, 0x00, 0x40, 0xaf, 0xbf, 0x00, 0x5c, 0xaf, 0xb6, 0x00, 0x50, + 0xaf, 0xb5, 0x00, 0x4c, 0xaf, 0xb4, 0x00, 0x48, 0xaf, 0xb3, 0x00, 0x44, + 0xaf, 0xb0, 0x00, 0x38, 0xe7, 0xb7, 0x00, 0x6c, 0xaf, 0xbe, 0x00, 0x58, + 0xaf, 0xb7, 0x00, 0x54, 0xaf, 0xb2, 0x00, 0x40, 0xaf, 0xb1, 0x00, 0x3c, + 0xe7, 0xb9, 0x00, 0x74, 0xe7, 0xb8, 0x00, 0x70, 0xe7, 0xb6, 0x00, 0x68, + 0xe7, 0xb5, 0x00, 0x64, 0xe7, 0xb4, 0x00, 0x60, 0xac, 0x62, 0x00, 0x04, + 0x04, 0x11, 0xff, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x14, + 0xac, 0x60, 0x00, 0x08, 0xac, 0x62, 0x00, 0x0c, 0xac, 0x60, 0x00, 0x00, + 0x04, 0x11, 0xff, 0x76, 0x00, 0x00, 0x00, 0x00, 0x24, 0x02, 0x00, 0x0e, + 0xac, 0x62, 0x00, 0x00, 0x3c, 0x05, 0xb0, 0x00, 0x3c, 0x13, 0xb0, 0x00, + 0x24, 0x07, 0x00, 0x02, 0x24, 0x06, 0x00, 0x01, 0x04, 0x11, 0xff, 0x6e, + 0x00, 0x80, 0xa8, 0x25, 0x24, 0xa5, 0x2a, 0xf0, 0x26, 0x73, 0x24, 0x7c, + 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x1b, 0x24, 0x03, 0x01, 0x0f, + 0x3c, 0x02, 0xa4, 0x30, 0xac, 0x43, 0x00, 0x00, 0x3c, 0x02, 0x18, 0x08, + 0x3c, 0x10, 0xa3, 0xf8, 0x24, 0x42, 0x28, 0x38, 0xae, 0x02, 0x00, 0x08, + 0x00, 0x00, 0x28, 0x25, 0x24, 0x04, 0xff, 0xff, 0x02, 0xa0, 0xf8, 0x09, + 0x24, 0x1a, 0x00, 0x08, 0x3c, 0x06, 0x00, 0x03, 0x34, 0xc6, 0x80, 0xfc, + 0x24, 0x05, 0x00, 0x01, 0x24, 0x04, 0xff, 0xff, 0x04, 0x11, 0xff, 0x5f, + 0x3c, 0x16, 0xb0, 0x00, 0x3c, 0x02, 0x44, 0x00, 0xc6, 0xd7, 0x2b, 0xb0, + 0xae, 0x02, 0x00, 0x0c, 0x00, 0x00, 0xa0, 0x25, 0xae, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x80, 0x25, 0x32, 0x02, 0xff, 0xff, 0x00, 0x10, 0x30, 0x80, + 0x00, 0x02, 0x11, 0x82, 0x00, 0x02, 0x13, 0xc0, 0x30, 0xc6, 0x00, 0xff, + 0x00, 0xc2, 0x30, 0x25, 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xff, 0x50, + 0x24, 0x04, 0x01, 0xff, 0x24, 0x06, 0x00, 0x46, 0x24, 0x05, 0x00, 0x03, + 0x04, 0x11, 0xff, 0x4c, 0x02, 0x00, 0x20, 0x25, 0x24, 0x05, 0x00, 0x03, + 0x04, 0x11, 0xff, 0x28, 0x02, 0x00, 0x20, 0x25, 0x30, 0x42, 0x00, 0x02, + 0x14, 0x40, 0x00, 0x26, 0x00, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x05, + 0x00, 0x10, 0x80, 0x43, 0x24, 0x05, 0x00, 0x01, 0x02, 0xa0, 0xf8, 0x09, + 0x26, 0x04, 0xff, 0xfe, 0x00, 0x10, 0x80, 0x43, 0x24, 0x02, 0x00, 0x01, + 0x02, 0x02, 0x10, 0x04, 0x24, 0x42, 0xff, 0xff, 0x3c, 0x03, 0x00, 0x78, + 0x00, 0x02, 0x14, 0xc0, 0x00, 0x43, 0x10, 0x24, 0x3c, 0x03, 0x00, 0x06, + 0x24, 0x63, 0x36, 0x34, 0x00, 0x43, 0x10, 0x25, 0x8f, 0xbf, 0x00, 0x5c, + 0x3c, 0x03, 0xa4, 0x70, 0xac, 0x62, 0x00, 0x10, 0x8c, 0x62, 0x00, 0x10, + 0x8f, 0xbe, 0x00, 0x58, 0x8f, 0xb7, 0x00, 0x54, 0x8f, 0xb6, 0x00, 0x50, + 0x8f, 0xb5, 0x00, 0x4c, 0x8f, 0xb3, 0x00, 0x44, 0x8f, 0xb2, 0x00, 0x40, + 0x8f, 0xb1, 0x00, 0x3c, 0x8f, 0xb0, 0x00, 0x38, 0xc7, 0xb9, 0x00, 0x74, + 0xc7, 0xb8, 0x00, 0x70, 0xc7, 0xb7, 0x00, 0x6c, 0xc7, 0xb6, 0x00, 0x68, + 0xc7, 0xb5, 0x00, 0x64, 0xc7, 0xb4, 0x00, 0x60, 0x02, 0x80, 0x10, 0x25, + 0x8f, 0xb4, 0x00, 0x48, 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x78, + 0x12, 0x00, 0x00, 0x03, 0x00, 0x00, 0x28, 0x25, 0x02, 0xa0, 0xf8, 0x09, + 0x26, 0x04, 0xff, 0xfe, 0x44, 0x80, 0xa0, 0x00, 0x3c, 0x02, 0xb0, 0x00, + 0x3c, 0x12, 0xa0, 0x00, 0xc4, 0x59, 0x2b, 0xb4, 0x02, 0x92, 0x90, 0x21, + 0x46, 0x00, 0xa5, 0x86, 0x00, 0x00, 0x88, 0x25, 0x46, 0x00, 0xa6, 0x06, + 0x02, 0x00, 0x20, 0x25, 0x32, 0x26, 0x00, 0xff, 0x44, 0x80, 0xa8, 0x00, + 0x04, 0x11, 0xff, 0x34, 0x00, 0x00, 0x28, 0x25, 0x24, 0x03, 0x00, 0x0a, + 0x24, 0x04, 0xff, 0xff, 0xae, 0x44, 0x00, 0x00, 0xae, 0x44, 0x00, 0x04, + 0x92, 0x42, 0x00, 0x05, 0x30, 0x42, 0x00, 0xff, 0xa3, 0xa2, 0x00, 0x28, + 0x93, 0xa2, 0x00, 0x28, 0x30, 0x42, 0x00, 0xff, 0x14, 0x40, 0x00, 0x32, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x63, 0xff, 0xff, 0x14, 0x60, 0xff, 0xf5, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x15, 0xc0, 0x3c, 0x00, 0x00, 0x00, 0x00, + 0x45, 0x02, 0x00, 0x05, 0x46, 0x19, 0xad, 0x42, 0x8e, 0x44, 0x00, 0x04, + 0x04, 0x11, 0x00, 0xbb, 0x00, 0x00, 0x00, 0x00, 0x46, 0x19, 0xad, 0x42, + 0x44, 0x91, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x80, 0x00, 0x20, + 0x46, 0x16, 0xad, 0x81, 0x46, 0x00, 0xb5, 0x82, 0xc6, 0xc0, 0x2b, 0xb0, + 0x46, 0x15, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x45, 0x01, 0x00, 0x05, + 0x46, 0x16, 0xa5, 0x00, 0x26, 0x31, 0x00, 0x01, 0x24, 0x02, 0x00, 0x40, + 0x56, 0x22, 0xff, 0xd8, 0x46, 0x00, 0xad, 0x86, 0x3c, 0x02, 0xb0, 0x00, + 0xc4, 0x40, 0x2b, 0xb8, 0x3c, 0x02, 0xb0, 0x00, 0x46, 0x00, 0xa5, 0x02, + 0xc4, 0x40, 0x2b, 0xbc, 0x46, 0x00, 0xa5, 0x00, 0x46, 0x00, 0xa0, 0x0d, + 0x44, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x20, 0x00, 0x16, + 0x00, 0x00, 0xb8, 0x25, 0x3c, 0x05, 0xb0, 0x00, 0x02, 0x00, 0x38, 0x25, + 0x24, 0x06, 0x00, 0x01, 0x24, 0xa5, 0x2b, 0x10, 0x02, 0x60, 0xf8, 0x09, + 0x24, 0x04, 0x00, 0x2e, 0x3c, 0x06, 0x00, 0x03, 0x34, 0xc6, 0x80, 0xf4, + 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xfe, 0xd6, 0x02, 0x00, 0x20, 0x25, + 0x10, 0x00, 0xff, 0x95, 0x00, 0x10, 0x80, 0x43, 0x93, 0xa2, 0x00, 0x28, + 0x30, 0x42, 0x00, 0x01, 0x54, 0x40, 0x00, 0x01, 0x46, 0x17, 0xad, 0x40, + 0x93, 0xa2, 0x00, 0x28, 0x30, 0x42, 0x00, 0xff, 0x10, 0x00, 0xff, 0xc3, + 0x00, 0x02, 0x10, 0x42, 0x00, 0x00, 0x38, 0x25, 0x00, 0x00, 0xf0, 0x25, + 0x02, 0x00, 0x20, 0x25, 0x33, 0xc6, 0x00, 0xff, 0x24, 0x05, 0x00, 0x01, + 0x04, 0x11, 0xfe, 0xe9, 0xaf, 0xa7, 0x00, 0x30, 0x00, 0x51, 0x18, 0x23, + 0x00, 0x03, 0x27, 0xc3, 0x00, 0x83, 0x18, 0x26, 0x00, 0x64, 0x18, 0x23, + 0x13, 0xc0, 0x00, 0x37, 0x24, 0x08, 0x00, 0x40, 0x8f, 0xa7, 0x00, 0x30, + 0x00, 0x67, 0x20, 0x2a, 0x54, 0x80, 0x00, 0x02, 0x03, 0xc0, 0xb8, 0x25, + 0x00, 0xe0, 0x18, 0x25, 0x02, 0x22, 0x10, 0x2a, 0x14, 0x40, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x27, 0xde, 0x00, 0x01, 0x17, 0xc8, 0xff, 0xeb, + 0x00, 0x60, 0x38, 0x25, 0x12, 0xe0, 0xff, 0xd2, 0x32, 0xe6, 0x00, 0xff, + 0x24, 0x05, 0x00, 0x01, 0x04, 0x11, 0xfe, 0xd3, 0x02, 0x00, 0x20, 0x25, + 0x00, 0x00, 0x28, 0x25, 0x04, 0x11, 0xfe, 0x8b, 0x02, 0x00, 0x20, 0x25, + 0x00, 0x02, 0x1b, 0x02, 0x00, 0x02, 0x22, 0x02, 0x00, 0x02, 0x29, 0x02, + 0x00, 0x02, 0x30, 0x82, 0x30, 0x63, 0x00, 0x0f, 0x24, 0x08, 0x00, 0x01, + 0x30, 0x84, 0x00, 0x0f, 0x30, 0xa5, 0x00, 0x0f, 0x30, 0xc6, 0x00, 0x01, + 0x14, 0x68, 0x00, 0x09, 0x30, 0x47, 0x00, 0x01, 0x24, 0x03, 0x00, 0x09, + 0x14, 0x83, 0x00, 0x05, 0x24, 0x03, 0x00, 0x0b, 0x14, 0xa3, 0x00, 0x04, + 0x24, 0x03, 0x00, 0x01, 0x54, 0xc0, 0x00, 0x14, 0x24, 0x05, 0x00, 0x09, + 0x24, 0x03, 0x00, 0x01, 0xaf, 0xa3, 0x00, 0x18, 0x00, 0x02, 0x1e, 0x02, + 0xaf, 0xa5, 0x00, 0x1c, 0x30, 0x63, 0x00, 0x0f, 0x3c, 0x05, 0xb0, 0x00, + 0xaf, 0xa7, 0x00, 0x24, 0xaf, 0xa6, 0x00, 0x20, 0xaf, 0xa4, 0x00, 0x14, + 0xaf, 0xa3, 0x00, 0x10, 0x00, 0x02, 0x3f, 0x02, 0x24, 0x06, 0x00, 0x07, + 0x24, 0xa5, 0x2b, 0x44, 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x19, + 0x10, 0x00, 0xff, 0xb0, 0x3c, 0x06, 0x00, 0x03, 0x10, 0x00, 0xff, 0xce, + 0x00, 0x00, 0xb8, 0x25, 0x04, 0x11, 0xfe, 0x64, 0x02, 0x00, 0x20, 0x25, + 0x00, 0x02, 0xf4, 0x02, 0x24, 0x02, 0x00, 0x05, 0x13, 0xc2, 0x00, 0x37, + 0x3c, 0x11, 0x04, 0x0a, 0x14, 0xe0, 0x00, 0x03, 0x26, 0x31, 0x1c, 0x10, + 0x3c, 0x11, 0x04, 0x12, 0x26, 0x31, 0x0c, 0x08, 0x02, 0x20, 0x30, 0x25, + 0x24, 0x05, 0x00, 0x06, 0x04, 0x11, 0xfe, 0x79, 0x02, 0x00, 0x20, 0x25, + 0x3c, 0x02, 0xa0, 0x20, 0x3c, 0x03, 0x00, 0x08, 0x02, 0x82, 0x10, 0x21, + 0x8e, 0x44, 0x00, 0x00, 0x8e, 0x44, 0x00, 0x04, 0x02, 0x43, 0x90, 0x21, + 0x16, 0x42, 0xff, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x05, 0xb0, 0x00, + 0x02, 0x00, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, 0x24, 0xa5, 0x2b, 0x64, + 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x06, 0x3c, 0x05, 0xb0, 0x00, + 0x03, 0xc0, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, 0x24, 0xa5, 0x2b, 0x70, + 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x0f, 0x24, 0x02, 0x00, 0x0b, + 0xaf, 0xa2, 0x00, 0x14, 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x02, 0x00, 0x09, + 0xaf, 0xa2, 0x00, 0x10, 0x24, 0x07, 0x00, 0x01, 0x24, 0x06, 0x00, 0x03, + 0x24, 0xa5, 0x2b, 0x84, 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x0b, + 0x3c, 0x05, 0xb0, 0x00, 0x02, 0xe0, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, + 0x24, 0xa5, 0x2b, 0x94, 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x0a, + 0x3c, 0x05, 0xb0, 0x00, 0x02, 0x20, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, + 0x24, 0xa5, 0x2b, 0xa4, 0x02, 0x60, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x06, + 0x3c, 0x02, 0x00, 0x20, 0x26, 0x10, 0x00, 0x02, 0x10, 0x00, 0xfe, 0xf3, + 0x02, 0x82, 0xa0, 0x21, 0x10, 0x00, 0xff, 0xcd, 0x26, 0x31, 0x1c, 0x10, + 0xac, 0x85, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x60, 0x8c, 0x62, 0x00, 0x10, + 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, + 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0xcc, 0x9e, + 0x24, 0x42, 0x2d, 0x51, 0x00, 0x82, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, + 0x00, 0x02, 0x1b, 0xc0, 0x00, 0x02, 0x14, 0x42, 0x00, 0x43, 0x10, 0x25, + 0x3c, 0x03, 0x1b, 0x87, 0x24, 0x63, 0x35, 0x93, 0x00, 0x43, 0x00, 0x18, + 0x00, 0x00, 0x10, 0x12, 0x00, 0x5b, 0x10, 0x26, 0x00, 0x02, 0x1b, 0x40, + 0x00, 0x02, 0x14, 0xc2, 0x00, 0x62, 0x10, 0x25, 0x00, 0x02, 0x18, 0x80, + 0x00, 0x62, 0x18, 0x21, 0x3c, 0x02, 0xe6, 0x54, 0x24, 0x42, 0x6b, 0x64, + 0x03, 0xe0, 0x00, 0x08, 0x00, 0x62, 0xd8, 0x21, 0x00, 0x1b, 0x1c, 0x02, + 0x3c, 0x02, 0x85, 0xeb, 0x00, 0x7b, 0x18, 0x26, 0x34, 0x42, 0xca, 0x6b, + 0x00, 0x62, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, 0x00, 0x02, 0x1b, 0x42, + 0x00, 0x62, 0x18, 0x26, 0x3c, 0x02, 0xc2, 0xb2, 0x34, 0x42, 0xae, 0x35, + 0x00, 0x62, 0x00, 0x18, 0x00, 0x00, 0x10, 0x12, 0x00, 0x02, 0x1c, 0x02, + 0x03, 0xe0, 0x00, 0x08, 0x00, 0x62, 0x10, 0x26, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xe0, 0x00, 0x00, 0x00, 0x00, + 0x27, 0xbd, 0xff, 0x80, 0x3c, 0x05, 0xb0, 0x00, 0xaf, 0xbe, 0x00, 0x78, + 0xaf, 0xb2, 0x00, 0x60, 0x03, 0xa0, 0xf0, 0x25, 0x3c, 0x12, 0xb0, 0x00, + 0x03, 0xc0, 0x38, 0x25, 0x24, 0x06, 0x00, 0x01, 0x24, 0xa5, 0x2b, 0xc0, + 0x24, 0x04, 0x00, 0x11, 0x26, 0x52, 0x24, 0x7c, 0xaf, 0xbf, 0x00, 0x7c, + 0xaf, 0xb7, 0x00, 0x74, 0xaf, 0xb6, 0x00, 0x70, 0xaf, 0xb5, 0x00, 0x6c, + 0xaf, 0xb4, 0x00, 0x68, 0xaf, 0xb3, 0x00, 0x64, 0xaf, 0xb1, 0x00, 0x5c, + 0x02, 0x40, 0xf8, 0x09, 0xaf, 0xb0, 0x00, 0x58, 0x3c, 0x02, 0x80, 0x7f, + 0x34, 0x43, 0x80, 0x00, 0x34, 0x42, 0x90, 0x00, 0xbc, 0x71, 0x00, 0x00, + 0x24, 0x63, 0x00, 0x10, 0x14, 0x62, 0xff, 0xfd, 0x3c, 0x04, 0xa0, 0x00, + 0x3c, 0x10, 0x10, 0x00, 0x3c, 0x03, 0x7f, 0x45, 0x3c, 0x02, 0x14, 0x00, + 0x26, 0x10, 0x10, 0x00, 0x24, 0x63, 0x4c, 0x46, 0x24, 0x42, 0x10, 0x00, + 0x02, 0x04, 0x28, 0x25, 0x8c, 0xa5, 0x00, 0x00, 0x50, 0xa3, 0x01, 0x0b, + 0x00, 0x10, 0x12, 0x00, 0x26, 0x10, 0x01, 0x00, 0x56, 0x02, 0xff, 0xfb, + 0x02, 0x04, 0x28, 0x25, 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x04, 0x00, 0x36, + 0x00, 0x00, 0x30, 0x25, 0x02, 0x40, 0xf8, 0x09, 0x24, 0xa5, 0x2c, 0x94, + 0x3c, 0x04, 0xb0, 0x00, 0x10, 0x00, 0x01, 0x12, 0x24, 0x84, 0x29, 0xc7, + 0x16, 0x62, 0x00, 0x75, 0x24, 0x14, 0x00, 0x20, 0x24, 0x14, 0x00, 0x38, + 0x24, 0x02, 0x00, 0x24, 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x50, 0x10, 0x21, + 0x00, 0x43, 0x10, 0x25, 0x24, 0x03, 0x00, 0x02, 0x8c, 0x47, 0x00, 0x00, + 0x16, 0x63, 0x00, 0x02, 0x24, 0x02, 0x00, 0x2c, 0x24, 0x02, 0x00, 0x38, + 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x50, 0x10, 0x21, 0x00, 0x43, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, 0x24, 0x03, 0x00, 0x02, 0x00, 0x02, 0x14, 0x02, - 0xaf, 0xc2, 0x00, 0x10, 0x16, 0x23, 0x00, 0x02, 0x24, 0x02, 0x00, 0x18, - 0x24, 0x02, 0x00, 0x1c, 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x54, 0x10, 0x21, - 0x00, 0x43, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, 0x01, 0x14, 0x40, 0x21, - 0xaf, 0xc2, 0x00, 0x2c, 0x8f, 0xc2, 0x00, 0x10, 0x00, 0x52, 0x00, 0x18, + 0xaf, 0xc2, 0x00, 0x2c, 0x16, 0x63, 0x00, 0x02, 0x24, 0x02, 0x00, 0x18, + 0x24, 0x02, 0x00, 0x1c, 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x50, 0x10, 0x21, + 0x00, 0x43, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, 0x00, 0xf0, 0x38, 0x21, + 0xaf, 0xc2, 0x00, 0x4c, 0x8f, 0xc2, 0x00, 0x2c, 0x00, 0x54, 0x00, 0x18, 0x00, 0x00, 0x30, 0x12, 0x24, 0xc2, 0x00, 0x10, 0x03, 0xa2, 0xe8, 0x23, - 0x27, 0xb0, 0x00, 0x10, 0x00, 0x10, 0x10, 0x23, 0x30, 0x42, 0x00, 0x0f, - 0x02, 0x02, 0x80, 0x21, 0x00, 0xc0, 0x28, 0x25, 0x04, 0x11, 0x00, 0xc0, - 0x02, 0x00, 0x20, 0x25, 0x3c, 0x02, 0xa4, 0x60, 0x8c, 0x43, 0x00, 0x10, + 0x27, 0xb1, 0x00, 0x28, 0x00, 0x11, 0x10, 0x23, 0x30, 0x42, 0x00, 0x0f, + 0x02, 0x22, 0x88, 0x21, 0x00, 0xc0, 0x28, 0x25, 0x04, 0x11, 0x00, 0xec, + 0x02, 0x20, 0x20, 0x25, 0x3c, 0x02, 0xa4, 0x60, 0x8c, 0x43, 0x00, 0x10, 0x30, 0x63, 0x00, 0x03, 0x14, 0x60, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, - 0x24, 0xc6, 0xff, 0xff, 0xac, 0x50, 0x00, 0x00, 0x3a, 0x31, 0x00, 0x02, - 0xac, 0x48, 0x00, 0x04, 0x04, 0x11, 0x01, 0x1d, 0xac, 0x46, 0x00, 0x0c, - 0x16, 0x20, 0x00, 0x36, 0x24, 0x02, 0x00, 0x04, 0x24, 0x02, 0x00, 0x0c, - 0xaf, 0xc2, 0x00, 0x14, 0x24, 0x02, 0x00, 0x14, 0xaf, 0xc2, 0x00, 0x18, - 0x24, 0x02, 0x00, 0x1c, 0xaf, 0xc2, 0x00, 0x1c, 0x24, 0x02, 0x00, 0x24, - 0xaf, 0xc2, 0x00, 0x20, 0x24, 0x02, 0x00, 0x04, 0xaf, 0xc2, 0x00, 0x24, - 0x3c, 0x02, 0x64, 0xe3, 0x24, 0x42, 0x63, 0x41, 0xaf, 0xd2, 0x00, 0x28, - 0x00, 0x00, 0x98, 0x25, 0x00, 0x00, 0x88, 0x25, 0xaf, 0xc2, 0x00, 0x30, - 0x3c, 0x12, 0xa4, 0x60, 0x8f, 0xc2, 0x00, 0x10, 0x02, 0x62, 0x10, 0x2a, - 0x54, 0x40, 0x00, 0x2a, 0x8f, 0xc2, 0x00, 0x14, 0x3c, 0x02, 0x00, 0xaa, + 0x24, 0xc6, 0xff, 0xff, 0xac, 0x51, 0x00, 0x00, 0x3a, 0x73, 0x00, 0x02, + 0xac, 0x47, 0x00, 0x04, 0x04, 0x11, 0x01, 0x49, 0xac, 0x46, 0x00, 0x0c, + 0x16, 0x60, 0x00, 0x44, 0x24, 0x02, 0x00, 0x04, 0x24, 0x02, 0x00, 0x0c, + 0xaf, 0xc2, 0x00, 0x34, 0x24, 0x02, 0x00, 0x14, 0xaf, 0xc2, 0x00, 0x38, + 0x24, 0x02, 0x00, 0x1c, 0xaf, 0xc2, 0x00, 0x3c, 0x24, 0x02, 0x00, 0x24, + 0xaf, 0xc2, 0x00, 0x40, 0x24, 0x02, 0x00, 0x04, 0xaf, 0xc2, 0x00, 0x44, + 0x3c, 0x02, 0x64, 0xe3, 0x24, 0x42, 0x63, 0x41, 0xaf, 0xc2, 0x00, 0x50, + 0x3c, 0x02, 0xb0, 0x00, 0x24, 0x42, 0x2c, 0x70, 0xaf, 0xd4, 0x00, 0x48, + 0xaf, 0xc0, 0x00, 0x28, 0x00, 0x00, 0x98, 0x25, 0xaf, 0xc2, 0x00, 0x54, + 0x8f, 0xc2, 0x00, 0x28, 0x8f, 0xc3, 0x00, 0x2c, 0x00, 0x43, 0x10, 0x2a, + 0x54, 0x40, 0x00, 0x35, 0x8f, 0xc2, 0x00, 0x34, 0x3c, 0x02, 0x00, 0xaa, 0x3c, 0x03, 0xa4, 0x04, 0x34, 0x42, 0xaa, 0x0e, 0xac, 0x62, 0x00, 0x10, 0x3c, 0x02, 0xa4, 0x08, 0xac, 0x40, 0x00, 0x00, 0xac, 0x60, 0x00, 0x1c, 0x3c, 0x02, 0xa4, 0x30, 0x24, 0x03, 0x05, 0x55, 0xac, 0x43, 0x00, 0x0c, 0x24, 0x04, 0x00, 0x02, 0xac, 0x43, 0x00, 0x08, 0x3c, 0x03, 0xa4, 0x60, 0xac, 0x64, 0x00, 0x10, 0x3c, 0x03, 0xa4, 0x80, 0xac, 0x60, 0x00, 0x18, 0x3c, 0x03, 0xa4, 0x50, 0xac, 0x60, 0x00, 0x0c, 0x24, 0x03, 0x08, 0x00, - 0xac, 0x43, 0x00, 0x00, 0x3c, 0x02, 0xa4, 0x00, 0x24, 0x42, 0x09, 0x90, + 0xac, 0x43, 0x00, 0x00, 0x3c, 0x02, 0xa4, 0x00, 0x24, 0x42, 0x0b, 0xd4, 0x00, 0x40, 0xf8, 0x09, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x00, - 0xac, 0x62, 0x00, 0x04, 0x3c, 0x02, 0xb0, 0x00, 0x24, 0x42, 0x10, 0x40, - 0x00, 0x40, 0xf8, 0x09, 0x8f, 0xc4, 0x00, 0x2c, 0x10, 0x00, 0xff, 0x9b, - 0x24, 0x02, 0x00, 0x1c, 0xaf, 0xc2, 0x00, 0x14, 0x24, 0x02, 0x00, 0x08, - 0xaf, 0xc2, 0x00, 0x18, 0x24, 0x02, 0x00, 0x0c, 0xaf, 0xc2, 0x00, 0x1c, - 0x24, 0x02, 0x00, 0x10, 0xaf, 0xc2, 0x00, 0x20, 0x10, 0x00, 0xff, 0xcc, - 0x24, 0x02, 0x00, 0x18, 0x8f, 0xc3, 0x00, 0x30, 0x02, 0x02, 0x10, 0x21, - 0x8c, 0x49, 0x00, 0x00, 0x8f, 0xc2, 0x00, 0x18, 0x02, 0x02, 0x10, 0x21, - 0x8c, 0x55, 0x00, 0x00, 0x8f, 0xc2, 0x00, 0x1c, 0x02, 0x02, 0x10, 0x21, - 0x8c, 0x56, 0x00, 0x00, 0x8f, 0xc2, 0x00, 0x20, 0x02, 0x02, 0x10, 0x21, - 0x8c, 0x57, 0x00, 0x00, 0x8f, 0xc2, 0x00, 0x24, 0x02, 0x02, 0x10, 0x21, - 0x8c, 0x4a, 0x00, 0x00, 0x8e, 0x02, 0x00, 0x00, 0x14, 0x43, 0x00, 0x14, - 0x24, 0x03, 0x00, 0x01, 0x16, 0xa0, 0x00, 0x14, 0x02, 0xa0, 0x88, 0x25, - 0x26, 0xe2, 0x00, 0x17, 0x00, 0x02, 0x10, 0xc2, 0x00, 0x02, 0x10, 0xc0, - 0x03, 0xa2, 0xe8, 0x23, 0x27, 0xb1, 0x00, 0x10, 0x00, 0x11, 0x10, 0x23, - 0x30, 0x42, 0x00, 0x0f, 0x02, 0x22, 0x88, 0x21, 0x02, 0xe0, 0x28, 0x25, - 0x04, 0x11, 0x00, 0x56, 0x02, 0x20, 0x20, 0x25, 0x16, 0xe0, 0x00, 0x0e, - 0x02, 0x20, 0xa8, 0x25, 0x8f, 0xc2, 0x00, 0x28, 0x26, 0x73, 0x00, 0x01, - 0x10, 0x00, 0xff, 0xaf, 0x02, 0x02, 0x80, 0x21, 0x14, 0x43, 0xff, 0xfc, - 0x8f, 0xc2, 0x00, 0x28, 0x12, 0xe0, 0xff, 0xf9, 0x32, 0xa2, 0x00, 0x07, - 0x10, 0x40, 0x00, 0x05, 0x31, 0x22, 0x00, 0x01, 0x3c, 0x04, 0xb0, 0x00, - 0x10, 0x00, 0x00, 0x45, 0x24, 0x84, 0x17, 0x82, 0x31, 0x22, 0x00, 0x01, - 0x10, 0x40, 0x00, 0x04, 0x32, 0xe2, 0x00, 0x01, 0x3c, 0x04, 0xb0, 0x00, - 0x10, 0x00, 0x00, 0x3f, 0x24, 0x84, 0x17, 0x64, 0x02, 0xe2, 0x10, 0x21, - 0x01, 0x34, 0x48, 0x21, 0x8e, 0x44, 0x00, 0x10, 0x30, 0x84, 0x00, 0x03, - 0x14, 0x80, 0xff, 0xfd, 0x3c, 0x03, 0xa4, 0x60, 0x34, 0x63, 0x00, 0x04, - 0x24, 0x42, 0xff, 0xff, 0x31, 0x4a, 0x10, 0x00, 0xae, 0x55, 0x00, 0x00, - 0xac, 0x69, 0x00, 0x00, 0xae, 0x42, 0x00, 0x0c, 0x11, 0x40, 0x00, 0x17, - 0x02, 0xe0, 0x28, 0x25, 0x02, 0xa0, 0x20, 0x25, 0x02, 0x20, 0xf8, 0x09, - 0x02, 0xc0, 0x30, 0x25, 0x00, 0x40, 0x28, 0x25, 0x02, 0xc0, 0x20, 0x25, - 0x04, 0x11, 0x00, 0x2c, 0x00, 0x40, 0x48, 0x25, 0x02, 0xb6, 0x10, 0x2b, - 0x10, 0x40, 0x00, 0x11, 0x02, 0xf5, 0x18, 0x21, 0x02, 0xc3, 0x10, 0x2b, - 0x54, 0x40, 0x00, 0x01, 0x02, 0xc0, 0x18, 0x25, 0x00, 0x75, 0x30, 0x23, - 0x00, 0xc0, 0x28, 0x25, 0x04, 0x11, 0x00, 0x22, 0x02, 0xa0, 0x20, 0x25, - 0x3c, 0x02, 0xa4, 0x00, 0x00, 0xc0, 0x28, 0x25, 0x24, 0x42, 0x02, 0xc0, - 0x00, 0x40, 0xf8, 0x09, 0x02, 0xa0, 0x20, 0x25, 0x04, 0x11, 0x00, 0x83, - 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xff, 0xc7, 0x8f, 0xc2, 0x00, 0x28, - 0x01, 0x36, 0x10, 0x21, 0x00, 0x43, 0x20, 0x2b, 0x10, 0x80, 0xff, 0xf9, - 0x00, 0x00, 0x00, 0x00, 0x02, 0xa2, 0x20, 0x2b, 0x54, 0x80, 0xff, 0xed, - 0x00, 0x40, 0xa8, 0x25, 0x10, 0x00, 0xff, 0xec, 0x00, 0x75, 0x30, 0x23, - 0x3c, 0x03, 0xa4, 0x00, 0xac, 0x62, 0x00, 0x0c, 0x26, 0x82, 0x00, 0x04, + 0x8c, 0x64, 0x00, 0x0c, 0xac, 0x62, 0x00, 0x04, 0x8c, 0x67, 0x00, 0x00, + 0xaf, 0xa4, 0x00, 0x18, 0x8c, 0x63, 0x00, 0x08, 0x3c, 0x05, 0xb0, 0x00, + 0xaf, 0xa2, 0x00, 0x10, 0x24, 0x04, 0x00, 0x0c, 0xaf, 0xa3, 0x00, 0x14, + 0x24, 0x06, 0x00, 0x04, 0x02, 0x40, 0xf8, 0x09, 0x24, 0xa5, 0x2c, 0x80, + 0x3c, 0x02, 0xb0, 0x00, 0x24, 0x42, 0x22, 0xf4, 0x00, 0x40, 0xf8, 0x09, + 0x8f, 0xc4, 0x00, 0x4c, 0x10, 0x00, 0xff, 0x8d, 0x24, 0x02, 0x00, 0x1c, + 0xaf, 0xc2, 0x00, 0x34, 0x24, 0x02, 0x00, 0x08, 0xaf, 0xc2, 0x00, 0x38, + 0x24, 0x02, 0x00, 0x0c, 0xaf, 0xc2, 0x00, 0x3c, 0x24, 0x02, 0x00, 0x10, + 0xaf, 0xc2, 0x00, 0x40, 0x10, 0x00, 0xff, 0xbe, 0x24, 0x02, 0x00, 0x18, + 0x8e, 0x26, 0x00, 0x00, 0x02, 0x22, 0x10, 0x21, 0x8c, 0x56, 0x00, 0x00, + 0x8f, 0xc2, 0x00, 0x38, 0x02, 0x22, 0x10, 0x21, 0x8c, 0x57, 0x00, 0x00, + 0x8f, 0xc2, 0x00, 0x3c, 0x02, 0x22, 0x10, 0x21, 0x8c, 0x54, 0x00, 0x00, + 0x8f, 0xc2, 0x00, 0x40, 0x02, 0x22, 0x10, 0x21, 0x8c, 0x55, 0x00, 0x00, + 0x8f, 0xc2, 0x00, 0x44, 0x02, 0x22, 0x10, 0x21, 0x8c, 0x42, 0x00, 0x00, + 0xaf, 0xc2, 0x00, 0x30, 0x8f, 0xc2, 0x00, 0x50, 0x14, 0xc2, 0x00, 0x16, + 0x24, 0x02, 0x00, 0x01, 0x16, 0xe0, 0x00, 0x16, 0x02, 0xe0, 0x98, 0x25, + 0x26, 0xa2, 0x00, 0x17, 0x00, 0x02, 0x10, 0xc2, 0x00, 0x02, 0x10, 0xc0, + 0x03, 0xa2, 0xe8, 0x23, 0x27, 0xb3, 0x00, 0x28, 0x00, 0x13, 0x10, 0x23, + 0x30, 0x42, 0x00, 0x0f, 0x02, 0x62, 0x98, 0x21, 0x02, 0xa0, 0x28, 0x25, + 0x04, 0x11, 0x00, 0x73, 0x02, 0x60, 0x20, 0x25, 0x16, 0xa0, 0x00, 0x15, + 0x02, 0x60, 0xb8, 0x25, 0x8f, 0xc2, 0x00, 0x28, 0x24, 0x42, 0x00, 0x01, + 0xaf, 0xc2, 0x00, 0x28, 0x8f, 0xc2, 0x00, 0x48, 0x10, 0x00, 0xff, 0xa0, + 0x02, 0x22, 0x88, 0x21, 0x14, 0xc2, 0xff, 0xfa, 0x8f, 0xc2, 0x00, 0x28, + 0x12, 0xa0, 0xff, 0xf7, 0x32, 0xe2, 0x00, 0x07, 0x10, 0x40, 0x00, 0x0a, + 0x32, 0xc2, 0x00, 0x01, 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x04, 0x00, 0x2b, + 0x00, 0x00, 0x30, 0x25, 0x02, 0x40, 0xf8, 0x09, 0x24, 0xa5, 0x2c, 0x08, + 0x3c, 0x04, 0xb0, 0x00, 0x10, 0x00, 0x00, 0x5b, 0x24, 0x84, 0x29, 0x8a, + 0x32, 0xc2, 0x00, 0x01, 0x10, 0x40, 0x00, 0x09, 0x8f, 0xc2, 0x00, 0x30, + 0x3c, 0x05, 0xb0, 0x00, 0x24, 0x04, 0x00, 0x31, 0x00, 0x00, 0x30, 0x25, + 0x02, 0x40, 0xf8, 0x09, 0x24, 0xa5, 0x2c, 0x38, 0x3c, 0x04, 0xb0, 0x00, + 0x10, 0x00, 0x00, 0x50, 0x24, 0x84, 0x29, 0x6c, 0x8f, 0xc7, 0x00, 0x28, + 0x8f, 0xc5, 0x00, 0x54, 0xaf, 0xa2, 0x00, 0x20, 0xaf, 0xa6, 0x00, 0x10, + 0xaf, 0xb5, 0x00, 0x1c, 0xaf, 0xb7, 0x00, 0x18, 0xaf, 0xb6, 0x00, 0x14, + 0x24, 0x06, 0x00, 0x06, 0x02, 0x40, 0xf8, 0x09, 0x24, 0x04, 0x00, 0x08, + 0x32, 0xa2, 0x00, 0x01, 0x02, 0xa2, 0x10, 0x21, 0x02, 0xd0, 0x48, 0x21, + 0x3c, 0x03, 0xa4, 0x60, 0x8c, 0x64, 0x00, 0x10, 0x30, 0x84, 0x00, 0x03, + 0x14, 0x80, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x24, 0x42, 0xff, 0xff, + 0xac, 0x77, 0x00, 0x00, 0xac, 0x69, 0x00, 0x04, 0xac, 0x62, 0x00, 0x0c, + 0x8f, 0xc2, 0x00, 0x30, 0x30, 0x56, 0x10, 0x00, 0x12, 0xc0, 0x00, 0x17, + 0x02, 0x80, 0x30, 0x25, 0x02, 0xa0, 0x28, 0x25, 0x02, 0x60, 0xf8, 0x09, + 0x02, 0xe0, 0x20, 0x25, 0x00, 0x40, 0x28, 0x25, 0x02, 0x80, 0x20, 0x25, + 0x04, 0x11, 0x00, 0x31, 0x00, 0x40, 0x30, 0x25, 0x02, 0xf4, 0x10, 0x2b, + 0x10, 0x40, 0x00, 0x11, 0x02, 0xb7, 0x18, 0x21, 0x02, 0x83, 0x10, 0x2b, + 0x54, 0x40, 0x00, 0x01, 0x02, 0x80, 0x18, 0x25, 0x00, 0x77, 0x30, 0x23, + 0x00, 0xc0, 0x28, 0x25, 0x04, 0x11, 0x00, 0x27, 0x02, 0xe0, 0x20, 0x25, + 0x3c, 0x02, 0xa4, 0x00, 0x00, 0xc0, 0x28, 0x25, 0x24, 0x42, 0x03, 0x44, + 0x00, 0x40, 0xf8, 0x09, 0x02, 0xe0, 0x20, 0x25, 0x04, 0x11, 0x00, 0x88, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xff, 0xaf, 0x8f, 0xc2, 0x00, 0x28, + 0x00, 0xd4, 0x10, 0x21, 0x00, 0x43, 0x20, 0x2b, 0x10, 0x80, 0xff, 0xf9, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe2, 0x20, 0x2b, 0x54, 0x80, 0xff, 0xed, + 0x00, 0x40, 0xb8, 0x25, 0x10, 0x00, 0xff, 0xec, 0x00, 0x77, 0x30, 0x23, + 0x3c, 0x03, 0xa4, 0x00, 0xac, 0x62, 0x00, 0x0c, 0x26, 0x02, 0x00, 0x04, 0x00, 0x44, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, 0x3c, 0x03, 0x00, 0xff, - 0x00, 0x02, 0x8e, 0x02, 0x00, 0x43, 0x10, 0x24, 0x3c, 0x03, 0x00, 0x01, - 0x14, 0x43, 0xff, 0x1e, 0x24, 0x02, 0x00, 0x02, 0x3c, 0x04, 0xb0, 0x00, - 0x24, 0x84, 0x17, 0x9f, 0x04, 0x11, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, - 0x24, 0x03, 0xff, 0xf0, 0x00, 0x83, 0x18, 0x24, 0x00, 0x83, 0x20, 0x23, - 0x00, 0x85, 0x20, 0x21, 0x00, 0x00, 0x10, 0x25, 0x00, 0x44, 0x28, 0x2a, - 0x14, 0xa0, 0x00, 0x03, 0x00, 0x43, 0x28, 0x21, 0x03, 0xe0, 0x00, 0x08, - 0x00, 0x00, 0x00, 0x00, 0xbc, 0xb5, 0x00, 0x00, 0x10, 0x00, 0xff, 0xf9, - 0x24, 0x42, 0x00, 0x10, 0x24, 0x02, 0xff, 0xfc, 0x00, 0x82, 0x10, 0x24, - 0x3c, 0x03, 0xa0, 0x00, 0x00, 0x43, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, - 0x00, 0x04, 0x20, 0x27, 0x00, 0x04, 0x20, 0xc0, 0x00, 0x82, 0x10, 0x06, - 0x03, 0xe0, 0x00, 0x08, 0x30, 0x42, 0x00, 0xff, 0x27, 0xbd, 0xff, 0xe8, - 0x3c, 0x02, 0xa4, 0x40, 0x3c, 0x03, 0xa0, 0x10, 0xaf, 0xbf, 0x00, 0x14, - 0xac, 0x43, 0x00, 0x04, 0x3c, 0x03, 0xa0, 0x12, 0x00, 0x80, 0x38, 0x25, - 0x3c, 0x02, 0xa0, 0x10, 0x24, 0x04, 0xc9, 0x50, 0x24, 0x63, 0x58, 0x00, - 0xa4, 0x44, 0x00, 0x00, 0x24, 0x42, 0x00, 0x02, 0x54, 0x43, 0xff, 0xfe, - 0xa4, 0x44, 0x00, 0x00, 0x3c, 0x02, 0xa4, 0x40, 0x24, 0x03, 0x01, 0x40, - 0xac, 0x43, 0x00, 0x08, 0x24, 0x03, 0x02, 0x00, 0xac, 0x43, 0x00, 0x30, - 0x3c, 0x06, 0xa0, 0x10, 0x24, 0x03, 0x04, 0x00, 0x3c, 0x08, 0xb0, 0x00, - 0xac, 0x43, 0x00, 0x34, 0x24, 0xc6, 0x64, 0x50, 0x25, 0x08, 0x17, 0xd4, - 0x24, 0x0b, 0xf7, 0xb2, 0x04, 0x11, 0xff, 0xdb, 0x00, 0xe0, 0x20, 0x25, - 0x00, 0x02, 0x16, 0x00, 0x00, 0x02, 0x16, 0x03, 0x10, 0x40, 0x00, 0x18, - 0x24, 0xe7, 0x00, 0x01, 0x24, 0x42, 0xff, 0xff, 0x00, 0x02, 0x28, 0x80, - 0x00, 0xa2, 0x28, 0x21, 0x01, 0x05, 0x28, 0x21, 0x24, 0xac, 0x00, 0x05, - 0x00, 0xc0, 0x48, 0x25, 0x24, 0x0d, 0x00, 0x08, 0x04, 0x11, 0xff, 0xce, - 0x00, 0xa0, 0x20, 0x25, 0x01, 0x20, 0x20, 0x25, 0x00, 0x00, 0x18, 0x25, - 0x00, 0x62, 0x50, 0x07, 0x31, 0x4a, 0x00, 0x01, 0x55, 0x40, 0x00, 0x01, - 0xa4, 0x8b, 0x00, 0x00, 0x24, 0x63, 0x00, 0x01, 0x14, 0x6d, 0xff, 0xfa, - 0x24, 0x84, 0x02, 0x80, 0x24, 0xa5, 0x00, 0x01, 0x14, 0xac, 0xff, 0xf3, - 0x25, 0x29, 0x00, 0x02, 0x10, 0x00, 0xff, 0xe4, 0x24, 0xc6, 0x00, 0x0e, - 0x3c, 0x05, 0xa4, 0x40, 0x04, 0x11, 0xff, 0xbd, 0x24, 0xa4, 0x00, 0x09, - 0x00, 0x40, 0x30, 0x25, 0x04, 0x11, 0xff, 0xba, 0x24, 0xa4, 0x00, 0x0b, - 0x00, 0x06, 0x20, 0xc0, 0x00, 0x86, 0x20, 0x23, 0x3c, 0x03, 0xb0, 0x00, - 0x00, 0x04, 0x20, 0x80, 0x24, 0x63, 0x17, 0x10, 0x00, 0x64, 0x18, 0x21, - 0x24, 0xa4, 0x00, 0x14, 0x24, 0xa5, 0x00, 0x30, 0x8c, 0x66, 0x00, 0x00, - 0x24, 0x84, 0x00, 0x04, 0xac, 0x86, 0xff, 0xfc, 0x14, 0x85, 0xff, 0xfc, - 0x24, 0x63, 0x00, 0x04, 0x10, 0x40, 0x00, 0x02, 0x24, 0x03, 0x32, 0x02, - 0x24, 0x03, 0x12, 0x02, 0x3c, 0x02, 0xa4, 0x40, 0xac, 0x43, 0x00, 0x00, - 0x10, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x60, - 0x8c, 0x62, 0x00, 0x10, 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, - 0x00, 0x00, 0x00, 0x00, 0x40, 0x04, 0x48, 0x00, 0x3c, 0x19, 0xa4, 0x00, - 0x27, 0x39, 0x09, 0x3c, 0x03, 0x20, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, - 0x04, 0x04, 0x23, 0x3a, 0x00, 0x00, 0x02, 0x71, 0x00, 0x15, 0x0c, 0x69, - 0x0c, 0x6f, 0x0c, 0x6e, 0x00, 0x80, 0x03, 0x00, 0x00, 0x5f, 0x02, 0x39, - 0x00, 0x09, 0x02, 0x6b, 0x03, 0xe5, 0x22, 0x39, 0x00, 0x00, 0x02, 0x0d, - 0x00, 0x00, 0x0c, 0x15, 0x0c, 0x15, 0x0c, 0x15, 0x00, 0x6c, 0x02, 0xec, - 0x00, 0x25, 0x01, 0xff, 0x00, 0x0e, 0x02, 0x04, 0x04, 0x65, 0x1e, 0x39, - 0x00, 0x00, 0x02, 0x0d, 0x00, 0x04, 0x0c, 0x11, 0x0c, 0x19, 0x0c, 0x1a, - 0x00, 0x6c, 0x02, 0xec, 0x00, 0x25, 0x01, 0xff, 0x00, 0x0e, 0x02, 0x04, - 0x10, 0x17, 0x11, 0x01, 0x1a, 0x11, 0x11, 0x1e, 0x10, 0x1f, 0x01, 0x19, - 0x1a, 0x1f, 0x01, 0x04, 0x01, 0x0d, 0x24, 0x1f, 0x10, 0x01, 0x0c, 0x17, - 0x14, 0x12, 0x19, 0x10, 0x0f, 0x00, 0x10, 0x17, 0x11, 0x01, 0x21, 0x0c, - 0x0f, 0x0f, 0x1d, 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x0a, 0x01, 0x0d, 0x24, - 0x1f, 0x10, 0x01, 0x0c, 0x17, 0x14, 0x12, 0x19, 0x10, 0x0f, 0x00, 0x10, - 0x17, 0x11, 0x01, 0x17, 0x14, 0x1f, 0x1f, 0x17, 0x10, 0x01, 0x10, 0x19, - 0x0f, 0x14, 0x0c, 0x19, 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x1e, 0x20, 0x1b, - 0x1b, 0x1a, 0x1d, 0x1f, 0x10, 0x0f, 0x00, 0x10, 0x17, 0x11, 0x01, 0x13, - 0x10, 0x0c, 0x0f, 0x10, 0x1d, 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x11, 0x1a, - 0x20, 0x19, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xa1, 0x99, - 0x85, 0x7e, 0x84, 0x82, 0xff, 0x80, 0x80, 0xc1, 0xa1, 0x91, 0x89, 0x86, - 0x89, 0x89, 0x89, 0x89, 0x76, 0x18, 0x14, 0x12, 0xff, 0x10, 0x8f, 0x89, - 0x89, 0x89, 0x71, 0x7e, 0x89, 0x89, 0x89, 0x72, 0x01, 0x81, 0x61, 0x19, - 0x07, 0x62, 0x95, 0x89, 0x95, 0x62, 0x4e, 0x91, 0x91, 0x91, 0x7e, 0xfe, - 0x11, 0x11, 0x11, 0xfe, 0xff, 0x89, 0x89, 0x89, 0x76, 0x7e, 0x81, 0x81, - 0x81, 0x81, 0xff, 0x81, 0x81, 0x81, 0x7e, 0xff, 0x89, 0x89, 0x89, 0x89, - 0xff, 0x09, 0x09, 0x09, 0x09, 0x7e, 0x81, 0x91, 0x51, 0xf1, 0xff, 0x08, - 0x08, 0x08, 0xff, 0x00, 0x81, 0xff, 0x81, 0x00, 0x40, 0x80, 0x80, 0x80, - 0x7f, 0xff, 0x08, 0x14, 0x22, 0xc1, 0xff, 0x80, 0x80, 0x80, 0x80, 0xff, - 0x02, 0x04, 0x02, 0xff, 0xff, 0x06, 0x18, 0x60, 0xff, 0x7e, 0x81, 0x81, - 0x81, 0x7e, 0xff, 0x11, 0x11, 0x11, 0x0e, 0x7e, 0x81, 0xa1, 0xc1, 0xfe, - 0xff, 0x11, 0x11, 0x11, 0xee, 0x86, 0x89, 0x89, 0x89, 0x71, 0x01, 0x01, - 0xff, 0x01, 0x01, 0x7f, 0x80, 0x80, 0x80, 0x7f, 0x1f, 0x60, 0x80, 0x60, - 0x1f, 0xff, 0x40, 0x20, 0x40, 0xff, 0xc7, 0x28, 0x10, 0x28, 0xc7, 0x07, - 0x08, 0xf0, 0x08, 0x07, 0xc1, 0xa1, 0x99, 0x85, 0x83, 0x00, 0x00, 0x00 + 0x00, 0x02, 0x9e, 0x02, 0x00, 0x43, 0x10, 0x24, 0x3c, 0x03, 0x00, 0x01, + 0x14, 0x43, 0xfe, 0xf7, 0x24, 0x02, 0x00, 0x02, 0x3c, 0x05, 0xb0, 0x00, + 0x24, 0x04, 0x00, 0x29, 0x00, 0x00, 0x30, 0x25, 0x02, 0x40, 0xf8, 0x09, + 0x24, 0xa5, 0x2b, 0xd8, 0x3c, 0x04, 0xb0, 0x00, 0x24, 0x84, 0x29, 0xa7, + 0x04, 0x11, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x24, 0x03, 0xff, 0xf0, + 0x00, 0x83, 0x18, 0x24, 0x00, 0x83, 0x20, 0x23, 0x00, 0x85, 0x20, 0x21, + 0x00, 0x00, 0x10, 0x25, 0x00, 0x44, 0x28, 0x2a, 0x14, 0xa0, 0x00, 0x03, + 0x00, 0x43, 0x28, 0x21, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0xbc, 0xb5, 0x00, 0x00, 0x10, 0x00, 0xff, 0xf9, 0x24, 0x42, 0x00, 0x10, + 0x24, 0x02, 0xff, 0xfc, 0x00, 0x82, 0x10, 0x24, 0x3c, 0x03, 0xa0, 0x00, + 0x00, 0x43, 0x10, 0x25, 0x8c, 0x42, 0x00, 0x00, 0x00, 0x04, 0x20, 0x27, + 0x00, 0x04, 0x20, 0xc0, 0x00, 0x82, 0x10, 0x06, 0x03, 0xe0, 0x00, 0x08, + 0x30, 0x42, 0x00, 0xff, 0x27, 0xbd, 0xff, 0xe8, 0x3c, 0x02, 0xa4, 0x40, + 0x3c, 0x03, 0xa0, 0x10, 0xaf, 0xbf, 0x00, 0x14, 0xac, 0x43, 0x00, 0x04, + 0x3c, 0x03, 0xa0, 0x12, 0x00, 0x80, 0x38, 0x25, 0x3c, 0x02, 0xa0, 0x10, + 0x24, 0x04, 0xc9, 0x50, 0x24, 0x63, 0x58, 0x00, 0xa4, 0x44, 0x00, 0x00, + 0x24, 0x42, 0x00, 0x02, 0x54, 0x43, 0xff, 0xfe, 0xa4, 0x44, 0x00, 0x00, + 0x3c, 0x02, 0xa4, 0x40, 0x24, 0x03, 0x01, 0x40, 0xac, 0x43, 0x00, 0x08, + 0x24, 0x03, 0x02, 0x00, 0xac, 0x43, 0x00, 0x30, 0x3c, 0x06, 0xa0, 0x10, + 0x24, 0x03, 0x04, 0x00, 0x3c, 0x08, 0xb0, 0x00, 0xac, 0x43, 0x00, 0x34, + 0x24, 0xc6, 0x64, 0x50, 0x25, 0x08, 0x29, 0xdc, 0x24, 0x0b, 0xf7, 0xb2, + 0x04, 0x11, 0xff, 0xdb, 0x00, 0xe0, 0x20, 0x25, 0x00, 0x02, 0x16, 0x00, + 0x00, 0x02, 0x16, 0x03, 0x10, 0x40, 0x00, 0x18, 0x24, 0xe7, 0x00, 0x01, + 0x24, 0x42, 0xff, 0xff, 0x00, 0x02, 0x28, 0x80, 0x00, 0xa2, 0x28, 0x21, + 0x01, 0x05, 0x28, 0x21, 0x24, 0xac, 0x00, 0x05, 0x00, 0xc0, 0x48, 0x25, + 0x24, 0x0d, 0x00, 0x08, 0x04, 0x11, 0xff, 0xce, 0x00, 0xa0, 0x20, 0x25, + 0x01, 0x20, 0x20, 0x25, 0x00, 0x00, 0x18, 0x25, 0x00, 0x62, 0x50, 0x07, + 0x31, 0x4a, 0x00, 0x01, 0x55, 0x40, 0x00, 0x01, 0xa4, 0x8b, 0x00, 0x00, + 0x24, 0x63, 0x00, 0x01, 0x14, 0x6d, 0xff, 0xfa, 0x24, 0x84, 0x02, 0x80, + 0x24, 0xa5, 0x00, 0x01, 0x14, 0xac, 0xff, 0xf3, 0x25, 0x29, 0x00, 0x02, + 0x10, 0x00, 0xff, 0xe4, 0x24, 0xc6, 0x00, 0x0e, 0x3c, 0x05, 0xa4, 0x40, + 0x04, 0x11, 0xff, 0xbd, 0x24, 0xa4, 0x00, 0x09, 0x00, 0x40, 0x30, 0x25, + 0x04, 0x11, 0xff, 0xba, 0x24, 0xa4, 0x00, 0x0b, 0x00, 0x06, 0x20, 0xc0, + 0x00, 0x86, 0x20, 0x23, 0x3c, 0x03, 0xb0, 0x00, 0x00, 0x04, 0x20, 0x80, + 0x24, 0x63, 0x29, 0x18, 0x00, 0x64, 0x18, 0x21, 0x24, 0xa4, 0x00, 0x14, + 0x24, 0xa5, 0x00, 0x30, 0x8c, 0x66, 0x00, 0x00, 0x24, 0x84, 0x00, 0x04, + 0xac, 0x86, 0xff, 0xfc, 0x14, 0x85, 0xff, 0xfc, 0x24, 0x63, 0x00, 0x04, + 0x10, 0x40, 0x00, 0x02, 0x24, 0x03, 0x32, 0x02, 0x24, 0x03, 0x12, 0x02, + 0x3c, 0x02, 0xa4, 0x40, 0xac, 0x43, 0x00, 0x00, 0x10, 0x00, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x60, 0x8c, 0x62, 0x00, 0x10, + 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x04, 0x48, 0x00, 0x3c, 0x19, 0xa4, 0x00, 0x27, 0x39, 0x0b, 0x80, + 0x03, 0x20, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x80, + 0x8c, 0x62, 0x00, 0x18, 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, + 0x3c, 0x02, 0xbf, 0xc0, 0x24, 0x03, 0x00, 0x08, 0xac, 0x43, 0x07, 0xfc, + 0x40, 0x80, 0xe0, 0x00, 0x40, 0x80, 0xe8, 0x00, 0x3c, 0x02, 0x80, 0x00, + 0x24, 0x43, 0x20, 0x00, 0xbc, 0x49, 0x00, 0x00, 0xbc, 0x49, 0x00, 0x10, + 0xbc, 0x49, 0x00, 0x20, 0xbc, 0x49, 0x00, 0x30, 0x24, 0x42, 0x00, 0x40, + 0x14, 0x43, 0xff, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0x80, 0x00, + 0x24, 0x43, 0x40, 0x00, 0xbc, 0x48, 0x00, 0x00, 0xbc, 0x48, 0x00, 0x20, + 0xbc, 0x48, 0x00, 0x40, 0xbc, 0x48, 0x00, 0x60, 0x24, 0x42, 0x00, 0x80, + 0x14, 0x43, 0xff, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x02, 0xa4, 0x00, + 0x8c, 0x42, 0x00, 0x00, 0x3c, 0x03, 0xa4, 0x04, 0x8c, 0x65, 0x00, 0x14, + 0x14, 0xa0, 0xff, 0xfe, 0x3c, 0x05, 0xa4, 0x00, 0x24, 0xa5, 0x10, 0x00, + 0xac, 0x65, 0x00, 0x00, 0x24, 0x45, 0x80, 0x00, 0xac, 0x65, 0x00, 0x04, + 0x3c, 0x05, 0x00, 0x01, 0x34, 0xa5, 0xf3, 0xff, 0xac, 0x65, 0x00, 0x0c, + 0x3c, 0x03, 0xa4, 0x04, 0x8c, 0x65, 0x00, 0x14, 0x14, 0xa0, 0xff, 0xfe, + 0x3c, 0x05, 0xa4, 0x00, 0x24, 0xa5, 0x00, 0x10, 0xac, 0x65, 0x00, 0x00, + 0x3c, 0x05, 0x00, 0x80, 0x24, 0xa5, 0x20, 0x00, 0xac, 0x65, 0x00, 0x04, + 0x24, 0x05, 0x0f, 0xef, 0xac, 0x65, 0x00, 0x08, 0x3c, 0x05, 0xa4, 0x80, + 0x8c, 0xa3, 0x00, 0x18, 0x30, 0x63, 0x00, 0x03, 0x14, 0x60, 0xff, 0xfd, + 0x00, 0x00, 0x00, 0x00, 0xac, 0xa0, 0x00, 0x18, 0x3c, 0x05, 0xa4, 0x04, + 0x8c, 0xa3, 0x00, 0x18, 0x14, 0x60, 0xff, 0xfe, 0x3c, 0x03, 0x7f, 0xff, + 0x34, 0x63, 0xff, 0xf0, 0x00, 0x43, 0x10, 0x21, 0x00, 0x80, 0x00, 0x08, + 0x00, 0x40, 0xe8, 0x25, 0x3c, 0x03, 0xa4, 0x60, 0x8c, 0x62, 0x00, 0x10, + 0x30, 0x42, 0x00, 0x03, 0x14, 0x40, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, + 0x8c, 0x82, 0x00, 0x00, 0x03, 0xe0, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, + 0x27, 0xbd, 0xff, 0xe8, 0x3c, 0x04, 0xb8, 0x00, 0x24, 0x84, 0x02, 0x00, + 0xaf, 0xbf, 0x00, 0x14, 0x04, 0x11, 0xff, 0xf3, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x42, 0x10, 0x00, 0x14, 0x40, 0xff, 0xfc, 0x8f, 0xbf, 0x00, 0x14, + 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x18, 0x27, 0xbd, 0xff, 0xe8, + 0xaf, 0xbf, 0x00, 0x14, 0x04, 0x11, 0xff, 0xf2, 0x00, 0x80, 0x28, 0x25, + 0x24, 0x02, 0x00, 0xf1, 0x00, 0x45, 0x28, 0x23, 0x3c, 0x04, 0xb8, 0x00, + 0x3c, 0x02, 0xa4, 0x00, 0x24, 0x42, 0x0b, 0x60, 0x00, 0x40, 0xf8, 0x09, + 0x24, 0x84, 0x02, 0x08, 0x8f, 0xbf, 0x00, 0x14, 0x10, 0x00, 0xff, 0xe8, + 0x27, 0xbd, 0x00, 0x18, 0x27, 0xbd, 0xff, 0xc0, 0xaf, 0xb3, 0x00, 0x24, + 0x3c, 0x13, 0xa4, 0x00, 0x8e, 0x62, 0x0c, 0x14, 0x24, 0x03, 0x00, 0x03, + 0xaf, 0xb6, 0x00, 0x30, 0xaf, 0xb5, 0x00, 0x2c, 0xaf, 0xb4, 0x00, 0x28, + 0xaf, 0xbf, 0x00, 0x3c, 0xaf, 0xbe, 0x00, 0x38, 0xaf, 0xb7, 0x00, 0x34, + 0xaf, 0xb2, 0x00, 0x20, 0xaf, 0xb1, 0x00, 0x1c, 0xaf, 0xb0, 0x00, 0x18, + 0x00, 0x80, 0xa8, 0x25, 0x00, 0xa0, 0xb0, 0x25, 0x00, 0xc0, 0xa0, 0x25, + 0x10, 0x43, 0x00, 0x48, 0xaf, 0xa7, 0x00, 0x4c, 0x2c, 0x43, 0x00, 0x04, + 0x10, 0x60, 0x00, 0x11, 0x24, 0x03, 0x00, 0x01, 0x10, 0x43, 0x00, 0x45, + 0x24, 0x03, 0x00, 0x02, 0x10, 0x43, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, + 0x8f, 0xbf, 0x00, 0x3c, 0x8f, 0xbe, 0x00, 0x38, 0x8f, 0xb7, 0x00, 0x34, + 0x8f, 0xb6, 0x00, 0x30, 0x8f, 0xb5, 0x00, 0x2c, 0x8f, 0xb4, 0x00, 0x28, + 0x8f, 0xb3, 0x00, 0x24, 0x8f, 0xb2, 0x00, 0x20, 0x8f, 0xb1, 0x00, 0x1c, + 0x8f, 0xb0, 0x00, 0x18, 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x40, + 0x24, 0x03, 0x00, 0x04, 0x14, 0x43, 0xff, 0xf2, 0x3c, 0x02, 0xa4, 0x00, + 0x8c, 0x52, 0x0c, 0x10, 0x12, 0x40, 0xff, 0xf0, 0x8f, 0xbf, 0x00, 0x3c, + 0x10, 0x00, 0x00, 0x05, 0x3c, 0x17, 0xa4, 0x00, 0x04, 0x11, 0xff, 0xc3, + 0x24, 0x04, 0x00, 0x01, 0x3c, 0x12, 0xb3, 0x00, 0x3c, 0x17, 0xa4, 0x00, + 0x00, 0x00, 0x88, 0x25, 0x26, 0xf7, 0x0b, 0x60, 0x02, 0xb1, 0x10, 0x23, + 0x1c, 0x40, 0x00, 0x29, 0x02, 0x51, 0x80, 0x21, 0x12, 0x80, 0x00, 0x0d, + 0x3c, 0x05, 0x20, 0x20, 0x27, 0xa2, 0x00, 0x4c, 0x3c, 0x15, 0xa4, 0x00, + 0x3c, 0x16, 0x20, 0x20, 0xaf, 0xa2, 0x00, 0x10, 0x00, 0x00, 0xf0, 0x25, + 0x26, 0xb5, 0x0b, 0x60, 0x26, 0x97, 0xff, 0xff, 0x26, 0xd6, 0x20, 0x20, + 0x03, 0xd4, 0x10, 0x2a, 0x14, 0x40, 0x00, 0x22, 0x8f, 0xa2, 0x00, 0x10, + 0x3c, 0x05, 0x20, 0x20, 0x3c, 0x11, 0xa4, 0x00, 0x02, 0x00, 0x20, 0x25, + 0x26, 0x31, 0x0b, 0x60, 0x02, 0x20, 0xf8, 0x09, 0x24, 0xa5, 0x20, 0x0a, + 0x8e, 0x62, 0x0c, 0x14, 0x02, 0x12, 0x80, 0x23, 0x24, 0x03, 0x00, 0x03, + 0x10, 0x43, 0x00, 0x52, 0x26, 0x10, 0x00, 0x04, 0x2c, 0x43, 0x00, 0x04, + 0x10, 0x60, 0x00, 0x73, 0x24, 0x03, 0x00, 0x01, 0x14, 0x43, 0x00, 0x33, + 0x3c, 0x12, 0xb8, 0x00, 0x3c, 0x04, 0xb3, 0xff, 0x02, 0x00, 0x28, 0x25, + 0x02, 0x20, 0xf8, 0x09, 0x24, 0x84, 0x00, 0x14, 0x10, 0x00, 0xff, 0xc2, + 0x8f, 0xbf, 0x00, 0x3c, 0x10, 0x00, 0xff, 0xd6, 0x3c, 0x12, 0xbf, 0xfe, + 0x3c, 0x12, 0xb3, 0xff, 0x10, 0x00, 0xff, 0xd3, 0x26, 0x52, 0x00, 0x20, + 0x02, 0xd1, 0x10, 0x21, 0x8c, 0x45, 0x00, 0x00, 0x02, 0xe0, 0xf8, 0x09, + 0x02, 0x00, 0x20, 0x25, 0x10, 0x00, 0xff, 0xd0, 0x26, 0x31, 0x00, 0x04, + 0x00, 0x00, 0x28, 0x25, 0x24, 0x43, 0x00, 0x04, 0xaf, 0xa3, 0x00, 0x10, + 0x00, 0x00, 0x88, 0x25, 0x8c, 0x43, 0x00, 0x00, 0x24, 0x02, 0x00, 0x08, + 0x00, 0x03, 0x27, 0x02, 0x2c, 0x87, 0x00, 0x0a, 0x10, 0xe0, 0x00, 0x02, + 0x24, 0x86, 0x00, 0x37, 0x24, 0x86, 0x00, 0x30, 0x00, 0x11, 0x26, 0x02, + 0x00, 0x05, 0x2a, 0x00, 0x00, 0x11, 0x8a, 0x00, 0x24, 0x42, 0xff, 0xff, + 0x00, 0x85, 0x28, 0x25, 0x00, 0xd1, 0x88, 0x25, 0x14, 0x40, 0xff, 0xf4, + 0x00, 0x03, 0x19, 0x00, 0x02, 0xa0, 0xf8, 0x09, 0x02, 0x00, 0x20, 0x25, + 0x26, 0x04, 0x00, 0x04, 0x02, 0xa0, 0xf8, 0x09, 0x02, 0x20, 0x28, 0x25, + 0x03, 0xd7, 0x10, 0x2a, 0x10, 0x40, 0x00, 0x04, 0x26, 0x04, 0x00, 0x08, + 0x02, 0xa0, 0xf8, 0x09, 0x02, 0xc0, 0x28, 0x25, 0x26, 0x04, 0x00, 0x0c, + 0x27, 0xde, 0x00, 0x01, 0x10, 0x00, 0xff, 0xbc, 0x00, 0x80, 0x80, 0x25, + 0x04, 0x11, 0xff, 0x6b, 0x00, 0x00, 0x20, 0x25, 0x3c, 0x05, 0x59, 0x80, + 0x02, 0x20, 0xf8, 0x09, 0x26, 0x44, 0x04, 0x04, 0x3c, 0x02, 0x00, 0xff, + 0x34, 0x42, 0xff, 0xff, 0x02, 0x02, 0x80, 0x24, 0x3c, 0x05, 0x01, 0x00, + 0x02, 0x05, 0x28, 0x25, 0x02, 0x20, 0xf8, 0x09, 0x26, 0x44, 0x04, 0x08, + 0x24, 0x05, 0x00, 0x08, 0x02, 0x20, 0xf8, 0x09, 0x26, 0x44, 0x04, 0x00, + 0x24, 0x05, 0x04, 0x02, 0x26, 0x44, 0x04, 0x00, 0x04, 0x11, 0xff, 0x47, + 0x00, 0x00, 0x00, 0x00, 0x30, 0x42, 0x00, 0xf0, 0x10, 0x40, 0xff, 0x7f, + 0x24, 0xa5, 0xff, 0xff, 0x14, 0xa0, 0xff, 0xfa, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0xff, 0x7b, 0xae, 0x60, 0x0c, 0x14, 0x3c, 0x12, 0xbf, 0xff, + 0x3c, 0x05, 0xbf, 0xfe, 0x02, 0x20, 0xf8, 0x09, 0x26, 0x44, 0x00, 0x04, + 0x3c, 0x02, 0x00, 0xff, 0x34, 0x42, 0xff, 0xff, 0x02, 0x02, 0x80, 0x24, + 0x3c, 0x05, 0x01, 0x00, 0x02, 0x05, 0x28, 0x25, 0x02, 0x20, 0xf8, 0x09, + 0x26, 0x44, 0x00, 0x08, 0x24, 0x05, 0x00, 0x4d, 0x02, 0x20, 0xf8, 0x09, + 0x3c, 0x04, 0xbf, 0xff, 0x04, 0x11, 0xff, 0x30, 0x3c, 0x04, 0xbf, 0xff, + 0x04, 0x40, 0xff, 0xfd, 0x3c, 0x10, 0xbf, 0xff, 0x24, 0x12, 0x01, 0x02, + 0x26, 0x10, 0x00, 0x04, 0x26, 0x52, 0xff, 0xff, 0x52, 0x40, 0xff, 0x64, + 0xae, 0x60, 0x0c, 0x14, 0x24, 0x05, 0x00, 0x55, 0x02, 0x20, 0xf8, 0x09, + 0x3c, 0x04, 0xbf, 0xff, 0x04, 0x11, 0xff, 0x24, 0x3c, 0x04, 0xbf, 0xff, + 0x04, 0x40, 0xff, 0xfd, 0x00, 0x00, 0x00, 0x00, 0x04, 0x11, 0xff, 0x20, + 0x02, 0x00, 0x20, 0x25, 0x04, 0x40, 0xff, 0xf4, 0x26, 0x52, 0xff, 0xff, + 0x10, 0x00, 0xff, 0x58, 0x8f, 0xbf, 0x00, 0x3c, 0x3c, 0x03, 0xa4, 0x00, + 0x8c, 0x62, 0x0c, 0x10, 0x00, 0x50, 0x10, 0x21, 0x10, 0x00, 0xff, 0x52, + 0xac, 0x62, 0x0c, 0x10, 0x3c, 0x02, 0xa4, 0x30, 0x8c, 0x42, 0x00, 0x04, + 0x27, 0xbd, 0xff, 0xd8, 0x30, 0x42, 0x00, 0xf0, 0x24, 0x03, 0x00, 0xb0, + 0xaf, 0xb1, 0x00, 0x18, 0xaf, 0xbf, 0x00, 0x24, 0xaf, 0xb3, 0x00, 0x20, + 0xaf, 0xb2, 0x00, 0x1c, 0xaf, 0xb0, 0x00, 0x14, 0x14, 0x43, 0x00, 0x0f, + 0x3c, 0x11, 0xa4, 0x00, 0x24, 0x02, 0x00, 0x04, 0xae, 0x22, 0x0c, 0x14, + 0x3c, 0x03, 0x80, 0x00, 0x8c, 0x64, 0x03, 0x5c, 0x3c, 0x02, 0xa4, 0x00, + 0x10, 0x80, 0x00, 0x44, 0xac, 0x44, 0x0c, 0x10, 0x8f, 0xbf, 0x00, 0x24, + 0x8f, 0xb3, 0x00, 0x20, 0x8f, 0xb2, 0x00, 0x1c, 0x8f, 0xb1, 0x00, 0x18, + 0x8f, 0xb0, 0x00, 0x14, 0x03, 0xe0, 0x00, 0x08, 0x27, 0xbd, 0x00, 0x28, + 0x3c, 0x04, 0xb8, 0x00, 0x04, 0x11, 0xfe, 0xfa, 0x24, 0x84, 0x02, 0xec, + 0x3c, 0x03, 0x55, 0x44, 0x3c, 0x10, 0xa4, 0x00, 0x24, 0x63, 0x45, 0x56, + 0x10, 0x43, 0x00, 0x21, 0x26, 0x10, 0x0b, 0x60, 0x3c, 0x12, 0xbf, 0xff, + 0x00, 0x00, 0x28, 0x25, 0x02, 0x00, 0xf8, 0x09, 0x26, 0x44, 0x00, 0x10, + 0x3c, 0x05, 0x5f, 0x55, 0x24, 0xa5, 0x4e, 0x4c, 0x02, 0x00, 0xf8, 0x09, + 0x36, 0x44, 0x00, 0x10, 0x3c, 0x05, 0x4f, 0x43, 0x36, 0x44, 0x00, 0x10, + 0x02, 0x00, 0xf8, 0x09, 0x24, 0xa5, 0x4b, 0x5f, 0x04, 0x11, 0xfe, 0xe7, + 0x26, 0x44, 0x00, 0x0c, 0x3c, 0x03, 0x53, 0x43, 0x24, 0x63, 0x76, 0x32, + 0x10, 0x43, 0x00, 0x0d, 0x3c, 0x12, 0x12, 0x34, 0x3c, 0x13, 0xb3, 0xff, + 0x26, 0x45, 0x56, 0x78, 0x02, 0x00, 0xf8, 0x09, 0x26, 0x64, 0x00, 0x20, + 0x04, 0x11, 0xfe, 0xdd, 0x36, 0x64, 0x00, 0x20, 0x36, 0x52, 0x56, 0x78, + 0x54, 0x52, 0xff, 0xd7, 0xae, 0x20, 0x0c, 0x14, 0x24, 0x02, 0x00, 0x01, + 0x10, 0x00, 0xff, 0xd4, 0xae, 0x22, 0x0c, 0x14, 0x10, 0x00, 0xff, 0xfd, + 0x24, 0x02, 0x00, 0x03, 0x24, 0x02, 0x00, 0x02, 0xae, 0x22, 0x0c, 0x14, + 0x24, 0x04, 0x00, 0x01, 0x04, 0x11, 0xfe, 0xe3, 0x3c, 0x11, 0xb3, 0x00, + 0x26, 0x32, 0x10, 0x00, 0x02, 0x20, 0x20, 0x25, 0x00, 0x00, 0x28, 0x25, + 0x02, 0x00, 0xf8, 0x09, 0x26, 0x31, 0x00, 0x04, 0x16, 0x32, 0xff, 0xfc, + 0x02, 0x20, 0x20, 0x25, 0x8f, 0xbf, 0x00, 0x24, 0x8f, 0xb3, 0x00, 0x20, + 0x8f, 0xb2, 0x00, 0x1c, 0x8f, 0xb1, 0x00, 0x18, 0x8f, 0xb0, 0x00, 0x14, + 0x00, 0x00, 0x20, 0x25, 0x10, 0x00, 0xfe, 0xd4, 0x27, 0xbd, 0x00, 0x28, + 0x8c, 0x64, 0x03, 0x64, 0x14, 0x80, 0xff, 0xbb, 0xac, 0x44, 0x0c, 0x10, + 0x8c, 0x63, 0x03, 0x6c, 0x14, 0x60, 0xff, 0xb8, 0xac, 0x43, 0x0c, 0x10, + 0x10, 0x00, 0xff, 0xb6, 0xae, 0x20, 0x0c, 0x14, 0x04, 0x04, 0x23, 0x3a, + 0x00, 0x00, 0x02, 0x71, 0x00, 0x15, 0x0c, 0x69, 0x0c, 0x6f, 0x0c, 0x6e, + 0x00, 0x80, 0x03, 0x00, 0x00, 0x5f, 0x02, 0x39, 0x00, 0x09, 0x02, 0x6b, + 0x03, 0xe5, 0x22, 0x39, 0x00, 0x00, 0x02, 0x0d, 0x00, 0x00, 0x0c, 0x15, + 0x0c, 0x15, 0x0c, 0x15, 0x00, 0x6c, 0x02, 0xec, 0x00, 0x25, 0x01, 0xff, + 0x00, 0x0e, 0x02, 0x04, 0x04, 0x65, 0x1e, 0x39, 0x00, 0x00, 0x02, 0x0d, + 0x00, 0x04, 0x0c, 0x11, 0x0c, 0x19, 0x0c, 0x1a, 0x00, 0x6c, 0x02, 0xec, + 0x00, 0x25, 0x01, 0xff, 0x00, 0x0e, 0x02, 0x04, 0x10, 0x17, 0x11, 0x01, + 0x1a, 0x11, 0x11, 0x1e, 0x10, 0x1f, 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x04, + 0x01, 0x0d, 0x24, 0x1f, 0x10, 0x01, 0x0c, 0x17, 0x14, 0x12, 0x19, 0x10, + 0x0f, 0x00, 0x10, 0x17, 0x11, 0x01, 0x21, 0x0c, 0x0f, 0x0f, 0x1d, 0x01, + 0x19, 0x1a, 0x1f, 0x01, 0x0a, 0x01, 0x0d, 0x24, 0x1f, 0x10, 0x01, 0x0c, + 0x17, 0x14, 0x12, 0x19, 0x10, 0x0f, 0x00, 0x10, 0x17, 0x11, 0x01, 0x17, + 0x14, 0x1f, 0x1f, 0x17, 0x10, 0x01, 0x10, 0x19, 0x0f, 0x14, 0x0c, 0x19, + 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x1e, 0x20, 0x1b, 0x1b, 0x1a, 0x1d, 0x1f, + 0x10, 0x0f, 0x00, 0x10, 0x17, 0x11, 0x01, 0x13, 0x10, 0x0c, 0x0f, 0x10, + 0x1d, 0x01, 0x19, 0x1a, 0x1f, 0x01, 0x11, 0x1a, 0x20, 0x19, 0x0f, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xa1, 0x99, 0x85, 0x7e, 0x84, 0x82, + 0xff, 0x80, 0x80, 0xc1, 0xa1, 0x91, 0x89, 0x86, 0x89, 0x89, 0x89, 0x89, + 0x76, 0x18, 0x14, 0x12, 0xff, 0x10, 0x8f, 0x89, 0x89, 0x89, 0x71, 0x7e, + 0x89, 0x89, 0x89, 0x72, 0x01, 0x81, 0x61, 0x19, 0x07, 0x62, 0x95, 0x89, + 0x95, 0x62, 0x4e, 0x91, 0x91, 0x91, 0x7e, 0xfe, 0x11, 0x11, 0x11, 0xfe, + 0xff, 0x89, 0x89, 0x89, 0x76, 0x7e, 0x81, 0x81, 0x81, 0x81, 0xff, 0x81, + 0x81, 0x81, 0x7e, 0xff, 0x89, 0x89, 0x89, 0x89, 0xff, 0x09, 0x09, 0x09, + 0x09, 0x7e, 0x81, 0x91, 0x51, 0xf1, 0xff, 0x08, 0x08, 0x08, 0xff, 0x00, + 0x81, 0xff, 0x81, 0x00, 0x40, 0x80, 0x80, 0x80, 0x7f, 0xff, 0x08, 0x14, + 0x22, 0xc1, 0xff, 0x80, 0x80, 0x80, 0x80, 0xff, 0x02, 0x04, 0x02, 0xff, + 0xff, 0x06, 0x18, 0x60, 0xff, 0x7e, 0x81, 0x81, 0x81, 0x7e, 0xff, 0x11, + 0x11, 0x11, 0x0e, 0x7e, 0x81, 0xa1, 0xc1, 0xfe, 0xff, 0x11, 0x11, 0x11, + 0xee, 0x86, 0x89, 0x89, 0x89, 0x71, 0x01, 0x01, 0xff, 0x01, 0x01, 0x7f, + 0x80, 0x80, 0x80, 0x7f, 0x1f, 0x60, 0x80, 0x60, 0x1f, 0xff, 0x40, 0x20, + 0x40, 0xff, 0xc7, 0x28, 0x10, 0x28, 0xc7, 0x07, 0x08, 0xf0, 0x08, 0x07, + 0xc1, 0xa1, 0x99, 0x85, 0x83, 0x00, 0x00, 0x00, 0x4c, 0x69, 0x62, 0x64, + 0x72, 0x61, 0x67, 0x6f, 0x6e, 0x20, 0x49, 0x50, 0x4c, 0x33, 0x20, 0x20, + 0x20, 0x20, 0x00, 0x00, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x20, 0x6d, 0x65, + 0x6d, 0x6f, 0x72, 0x79, 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, + 0x73, 0x74, 0x61, 0x67, 0x65, 0x32, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x41, 0x53, 0x53, 0x45, 0x52, 0x54, 0x49, 0x4f, 0x4e, 0x20, 0x46, 0x41, + 0x49, 0x4c, 0x45, 0x44, 0x3a, 0x20, 0x63, 0x68, 0x69, 0x70, 0x5f, 0x69, + 0x64, 0x20, 0x3c, 0x20, 0x35, 0x31, 0x32, 0x20, 0x20, 0x20, 0x20, 0x00, + 0x72, 0x64, 0x72, 0x61, 0x6d, 0x5f, 0x72, 0x65, 0x67, 0x5f, 0x69, 0x6e, + 0x69, 0x74, 0x3a, 0x20, 0x49, 0x4f, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x65, 0x72, 0x72, 0x6f, + 0x72, 0x3a, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x74, 0x20, 0x63, + 0x61, 0x6c, 0x69, 0x62, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x66, + 0x61, 0x69, 0x6c, 0x65, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x63, 0x68, + 0x69, 0x70, 0x5f, 0x69, 0x64, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x69, 0x6e, 0x76, 0x61, 0x6c, + 0x69, 0x64, 0x20, 0x67, 0x65, 0x6f, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x3a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x43, 0x68, 0x69, 0x70, + 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x09, 0x4d, 0x61, 0x6e, + 0x75, 0x66, 0x61, 0x63, 0x74, 0x75, 0x72, 0x65, 0x72, 0x3a, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x00, 0x09, 0x47, 0x65, 0x6f, 0x6d, 0x65, 0x74, 0x72, + 0x79, 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x09, 0x43, 0x75, 0x72, + 0x72, 0x65, 0x6e, 0x74, 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, + 0x09, 0x52, 0x41, 0x53, 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, + 0x3f, 0x80, 0x00, 0x00, 0x3c, 0x4c, 0xcc, 0xcd, 0x40, 0x0c, 0xcc, 0xcd, + 0x3f, 0x00, 0x00, 0x00, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x66, 0x72, + 0x6f, 0x6d, 0x20, 0x52, 0x44, 0x52, 0x41, 0x4d, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x00, 0x00, 0x00, 0x45, 0x4c, 0x46, 0x3a, 0x20, 0x6c, 0x69, 0x74, + 0x74, 0x6c, 0x65, 0x20, 0x65, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x20, 0x45, + 0x4c, 0x46, 0x73, 0x20, 0x61, 0x72, 0x65, 0x20, 0x6e, 0x6f, 0x74, 0x20, + 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x20, 0x20, 0x20, + 0x20, 0x00, 0x00, 0x00, 0x45, 0x4c, 0x46, 0x3a, 0x20, 0x76, 0x61, 0x64, + 0x64, 0x72, 0x20, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x38, 0x2d, + 0x62, 0x79, 0x74, 0x65, 0x20, 0x61, 0x6c, 0x69, 0x67, 0x6e, 0x65, 0x64, + 0x20, 0x69, 0x6e, 0x20, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x20, + 0x20, 0x20, 0x20, 0x00, 0x45, 0x4c, 0x46, 0x3a, 0x20, 0x66, 0x69, 0x6c, + 0x65, 0x20, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x20, 0x69, 0x73, 0x20, + 0x6e, 0x6f, 0x74, 0x20, 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x61, + 0x6c, 0x69, 0x67, 0x6e, 0x65, 0x64, 0x20, 0x69, 0x6e, 0x20, 0x73, 0x65, + 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, + 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x42, 0x6f, 0x6f, 0x74, 0x20, 0x66, 0x6c, 0x61, + 0x67, 0x73, 0x3a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x45, 0x4c, 0x46, 0x20, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x6e, + 0x6f, 0x74, 0x20, 0x66, 0x6f, 0x75, 0x6e, 0x64, 0x3a, 0x20, 0x6d, 0x61, + 0x6b, 0x65, 0x20, 0x73, 0x75, 0x72, 0x65, 0x20, 0x69, 0x74, 0x20, 0x69, + 0x73, 0x20, 0x32, 0x35, 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x61, + 0x6c, 0x69, 0x67, 0x6e, 0x65, 0x64, 0x20, 0x20, 0x20, 0x20, 0x00 }; -unsigned int default_ipl3_len = 6288; +unsigned int default_ipl3_len = 11471; From 53557227a20832fb6ba9729f92baa18c70869d5a Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 18:01:19 +0200 Subject: [PATCH 41/48] gl: fix usage of CI4/CI8 textures in RDPQ_TEXTURING_N64 mode --- src/GL/gl_constants.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GL/gl_constants.h b/src/GL/gl_constants.h index 510c6fe048..bcf42d019d 100644 --- a/src/GL/gl_constants.h +++ b/src/GL/gl_constants.h @@ -160,7 +160,7 @@ #define VTX_LOADER_MAX_COMMANDS 11 #define VTX_LOADER_MAX_SIZE (VTX_LOADER_MAX_COMMANDS * 4) -#define RDPQ_TEXTURING_MASK ((SOM_SAMPLE_MASK | SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE | SOMX_NUMLODS_MASK)>>32) +#define RDPQ_TEXTURING_MASK ((SOM_SAMPLE_MASK | SOM_TEXTURE_LOD | SOMX_LOD_INTERPOLATE | SOMX_NUMLODS_MASK | SOM_TLUT_MASK)>>32) #define PALETTE_MATRIX_INDEX 3 From a7572eb88603876e7f206077879baf529d34fc44 Mon Sep 17 00:00:00 2001 From: thekovic <72971433+thekovic@users.noreply.github.com> Date: Mon, 10 Jun 2024 16:57:37 +0200 Subject: [PATCH 42/48] vi: Handle PAL50 line count when configuring VI --- src/display.c | 9 ++++++++- src/vi.h | 14 +++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/display.c b/src/display.c index e6fa1ab02d..7e7e403934 100644 --- a/src/display.c +++ b/src/display.c @@ -263,7 +263,14 @@ void display_init( resolution_t res, bitdepth_t bit, uint32_t num_buffers, gamma vi_write_safe(VI_ORIGIN, PhysicalAddr(__safe_buffer[0])); vi_write_safe(VI_WIDTH, res.width); vi_write_safe(VI_X_SCALE, VI_X_SCALE_SET(res.width)); - vi_write_safe(VI_Y_SCALE, VI_Y_SCALE_SET(res.height)); + if (__tv_type == TV_PAL) + { + vi_write_safe(VI_Y_SCALE, VI_Y_SCALE_SET_288_LINES(res.height)); + } + else + { + vi_write_safe(VI_Y_SCALE, VI_Y_SCALE_SET_240_LINES(res.height)); + } vi_write_safe(VI_CTRL, control); enable_interrupts(); diff --git a/src/vi.h b/src/vi.h index c7b25eb785..204df70329 100644 --- a/src/vi.h +++ b/src/vi.h @@ -79,12 +79,12 @@ typedef struct vi_config_s{ static const vi_config_t vi_ntsc_p = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x03e52239, 0x0000020d, 0x00000c15, - 0x0c150c15, 0x006c02ec, 0x002501ff, 0x000e0204, + 0x0c150c15, 0x006c02ec, 0x00230203, 0x000e0204, 0x00000000, 0x00000000 }}; static const vi_config_t vi_pal_p = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x0404233a, 0x00000271, 0x00150c69, - 0x0c6f0c6e, 0x00800300, 0x005f0239, 0x0009026b, + 0x0c6f0c6e, 0x00800300, 0x002d026d, 0x0009026b, 0x00000000, 0x00000000 }}; static const vi_config_t vi_mpal_p = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, @@ -94,12 +94,12 @@ static const vi_config_t vi_mpal_p = {.regs = { static const vi_config_t vi_ntsc_i = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x03e52239, 0x0000020c, 0x00000c15, - 0x0c150c15, 0x006c02ec, 0x002301fd, 0x000e0204, + 0x0c150c15, 0x006c02ec, 0x00230203, 0x000e0204, 0x00000000, 0x00000000 }}; static const vi_config_t vi_pal_i = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x0404233a, 0x00000270, 0x00150c69, - 0x0c6f0c6e, 0x00800300, 0x005d0237, 0x0009026b, + 0x0c6f0c6e, 0x00800300, 0x002d026d, 0x0009026b, 0x00000000, 0x00000000 }}; static const vi_config_t vi_mpal_i = {.regs = { 0x00000000, 0x00000000, 0x00000000, 0x00000002, @@ -204,7 +204,11 @@ static const vi_config_t vi_config_presets[2][3] = { /** Under VI_Y_SCALE */ /** @brief VI_Y_SCALE Register: set 1/vertical scale up factor (value is converted to 2.10 format) */ -#define VI_Y_SCALE_SET(value) (( 1024*(value) + 120 ) / 240) +#define VI_Y_SCALE_SET_240_LINES(value) (( 1024*(value) + 120 ) / 240) + +/** Under VI_Y_SCALE */ +/** @brief VI_Y_SCALE Register: set 1/vertical scale up factor (value is converted to 2.10 format) */ +#define VI_Y_SCALE_SET_288_LINES(value) (( 1024*(value) + 144 ) / 288) /** * @brief Write a set of video registers to the VI From 7ec5c9ed731b1da11e33db5a39dde097d5b37e04 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 23:08:00 +0200 Subject: [PATCH 43/48] rdpq: fix rdpq_tex_blit when flipping with a source rect Also fix a bug that cause the texture loader to miscalculate the strip heights when a source rect was activated, causing far more chunks to be generated. Updates #562 --- src/rdpq/rdpq_tex.c | 58 +++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/src/rdpq/rdpq_tex.c b/src/rdpq/rdpq_tex.c index aa6da2ce58..025857f707 100644 --- a/src/rdpq/rdpq_tex.c +++ b/src/rdpq/rdpq_tex.c @@ -464,7 +464,7 @@ static void ltd_texloader(rdpq_tile_t tile, const surface_t *tex, int s0, int t0 tex_loader_t tload = tex_loader_init(tile, tex); // Calculate the optimal height for a strip, based on strips of maximum length. - int tile_h = tex_loader_calc_max_height(&tload, tex->width); + int tile_h = tex_loader_calc_max_height(&tload, s1 - s0); // Go through the surface while (t0 < t1) @@ -491,24 +491,26 @@ static void tex_xblit_norotate_noscale(const surface_t *surf, float x0, float y0 rdpq_tile_t tile = parms->tile; int src_width = parms->width ? parms->width : surf->width; int src_height = parms->height ? parms->height : surf->height; - int s0 = parms->s0; - int t0 = parms->t0; - int cx = parms->cx + s0; - int cy = parms->cy + t0; + int os0 = parms->s0; + int ot0 = parms->t0; + int os1 = os0 + src_width; + int ot1 = ot0 + src_height; bool flip_x = parms->flip_x; bool flip_y = parms->flip_y; + x0 -= os0 + parms->cx; + y0 -= ot0 + parms->cy; void draw_cb(rdpq_tile_t tile, int s0, int t0, int s1, int t1) { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (flip_x) { ks0 = src_width - s0 - 1; ks1 = src_width - s1 - 1; } - if (flip_y) { kt0 = src_height - t0 - 1; kt1 = src_height - t1 - 1; } + if (flip_x) { ks0 = os1 - s0 + os0 - 1; ks1 = os1 - s1 + os0 - 1; } + if (flip_y) { kt0 = ot1 - t0 + ot0 - 1; kt1 = ot1 - t1 + ot0 - 1; } - rdpq_texture_rectangle(tile, x0 + ks0 - cx, y0 + kt0 - cy, x0 + ks1 - cx, y0 + kt1 - cy, s0, t0); + rdpq_texture_rectangle(tile, x0 + ks0, y0 + kt0, x0 + ks1, y0 + kt1, s0, t0); } - (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); + (*ltd)(tile, surf, os0, ot0, os1, ot1, draw_cb, parms->filtering); } __attribute__((noinline)) @@ -517,10 +519,12 @@ static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const rdpq_tile_t tile = parms->tile; int src_width = parms->width ? parms->width : surf->width; int src_height = parms->height ? parms->height : surf->height; - int s0 = parms->s0; - int t0 = parms->t0; - int cx = parms->cx + s0; - int cy = parms->cy + t0; + int os0 = parms->s0; + int ot0 = parms->t0; + int os1 = os0 + src_width; + int ot1 = ot0 + src_height; + int cx = parms->cx + os0; + int cy = parms->cy + ot0; float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; float scaley = parms->scale_y == 0 ? 1.0f : parms->scale_y; bool flip_x = (scalex < 0) ^ parms->flip_x; @@ -537,8 +541,8 @@ static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (flip_x) { ks0 = src_width - s0 - 1; ks1 = src_width - s1 - 1; } - if (flip_y) { kt0 = src_height - t0 - 1; kt1 = src_height - t1 - 1; } + if (flip_x) { ks0 = os1 - s0 + os0 - 1; ks1 = os1 - s1 + os0 - 1; } + if (flip_y) { kt0 = ot1 - t0 + ot0 - 1; kt1 = ot1 - t1 + ot0 - 1; } float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; @@ -548,7 +552,7 @@ static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const rdpq_texture_rectangle_scaled(tile, k0x, k0y, k2x, k2y, s0, t0, s1, t1); } - (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); + (*ltd)(tile, surf, os0, ot0, os1, ot1, draw_cb, parms->filtering); } __attribute__((noinline)) @@ -557,10 +561,12 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit rdpq_tile_t tile = parms->tile; int src_width = parms->width ? parms->width : surf->width; int src_height = parms->height ? parms->height : surf->height; - int s0 = parms->s0; - int t0 = parms->t0; - int cx = parms->cx + s0; - int cy = parms->cy + t0; + int os0 = parms->s0; + int ot0 = parms->t0; + int os1 = os0 + src_width; + int ot1 = ot0 + src_height; + int cx = parms->cx + os0; + int cy = parms->cy + ot0; int nx = parms->nx; int ny = parms->ny; float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; @@ -580,8 +586,8 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (parms->flip_x) { ks0 = src_width - ks0; ks1 = src_width - ks1; } - if (parms->flip_y) { kt0 = src_height - kt0; kt1 = src_height - kt1; } + if (parms->flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } + if (parms->flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; @@ -603,8 +609,8 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit void draw_cb_multi_rot(rdpq_tile_t tile, int s0, int t0, int s1, int t1) { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (parms->flip_x) { ks0 = src_width - ks0; ks1 = src_width - ks1; } - if (parms->flip_y) { kt0 = src_height - kt0; kt1 = src_height - kt1; } + if (parms->flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } + if (parms->flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } assert(s1-s0 == src_width); @@ -642,9 +648,9 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit } if (nx || ny) { - (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb_multi_rot, parms->filtering); + (*ltd)(tile, surf, os0, ot0, os1, ot1, draw_cb_multi_rot, parms->filtering); } else { - (*ltd)(tile, surf, s0, t0, s0 + src_width, t0 + src_height, draw_cb, parms->filtering); + (*ltd)(tile, surf, os0, ot0, os1, ot1, draw_cb, parms->filtering); } } From 4bbfef26f4ab545354c72efbf6023277a49ff637 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 23:22:59 +0200 Subject: [PATCH 44/48] rdpq: fix rdpq_tex_blit with negative scale and rotation --- src/rdpq/rdpq_tex.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/rdpq/rdpq_tex.c b/src/rdpq/rdpq_tex.c index 025857f707..4a8ba7b3dd 100644 --- a/src/rdpq/rdpq_tex.c +++ b/src/rdpq/rdpq_tex.c @@ -13,6 +13,7 @@ #include "rdpq_tex.h" #include "rdpq_tex_internal.h" #include "utils.h" +#include "fmath.h" #include /** @brief Non-zero if we are doing a multi-texture upload */ @@ -527,8 +528,6 @@ static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const int cy = parms->cy + ot0; float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; float scaley = parms->scale_y == 0 ? 1.0f : parms->scale_y; - bool flip_x = (scalex < 0) ^ parms->flip_x; - bool flip_y = (scaley < 0) ^ parms->flip_y; float mtx[3][2] = { { scalex, 0 }, @@ -541,8 +540,8 @@ static void tex_xblit_norotate(const surface_t *surf, float x0, float y0, const { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (flip_x) { ks0 = os1 - s0 + os0 - 1; ks1 = os1 - s1 + os0 - 1; } - if (flip_y) { kt0 = ot1 - t0 + ot0 - 1; kt1 = ot1 - t1 + ot0 - 1; } + if (parms->flip_x) { ks0 = os1 - s0 + os0 - 1; ks1 = os1 - s1 + os0 - 1; } + if (parms->flip_y) { kt0 = ot1 - t0 + ot0 - 1; kt1 = ot1 - t1 + ot0 - 1; } float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; @@ -571,9 +570,14 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit int ny = parms->ny; float scalex = parms->scale_x == 0 ? 1.0f : parms->scale_x; float scaley = parms->scale_y == 0 ? 1.0f : parms->scale_y; + bool flip_x = parms->flip_x; + bool flip_y = parms->flip_y; + + if (scalex < 0) { flip_x = !flip_x; scalex = -scalex; } + if (scaley < 0) { flip_y = !flip_y; scaley = -scaley; } float sin_theta, cos_theta; - sincosf(parms->theta, &sin_theta, &cos_theta); + fm_sincosf(parms->theta, &sin_theta, &cos_theta); float mtx[3][2] = { { cos_theta * scalex, -sin_theta * scaley }, @@ -586,8 +590,8 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (parms->flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } - if (parms->flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } + if (flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } + if (flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } float k0x = mtx[0][0] * ks0 + mtx[1][0] * kt0 + mtx[2][0]; float k0y = mtx[0][1] * ks0 + mtx[1][1] * kt0 + mtx[2][1]; @@ -609,8 +613,8 @@ static void tex_xblit(const surface_t *surf, float x0, float y0, const rdpq_blit void draw_cb_multi_rot(rdpq_tile_t tile, int s0, int t0, int s1, int t1) { int ks0 = s0, kt0 = t0, ks1 = s1, kt1 = t1; - if (parms->flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } - if (parms->flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } + if (flip_x) { ks0 = os1 - ks0 + os0; ks1 = os1 - ks1 + os0; } + if (flip_y) { kt0 = ot1 - kt0 + ot0; kt1 = ot1 - kt1 + ot0; } assert(s1-s0 == src_width); From 32098c5e19c1a5d423b65a40aa79c3fc77747256 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Mon, 10 Jun 2024 23:25:09 +0200 Subject: [PATCH 45/48] rdpq_tex: document that negative scale performs clipping --- include/rdpq_tex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/rdpq_tex.h b/include/rdpq_tex.h index 13fd673ac0..f6e6881a63 100644 --- a/include/rdpq_tex.h +++ b/include/rdpq_tex.h @@ -308,8 +308,8 @@ typedef struct rdpq_blitparms_s { int cx; ///< Transformation center (aka "hotspot") X coordinate, relative to (s0, t0). Used for all transformations int cy; ///< Transformation center (aka "hotspot") X coordinate, relative to (s0, t0). Used for all transformations - float scale_x; ///< Horizontal scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f) - float scale_y; ///< Vertical scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f) + float scale_x; ///< Horizontal scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f). If negative, horizontal flipping is applied + float scale_y; ///< Vertical scale factor to apply to the surface. If 0, no scaling is performed (the same as 1.0f). If negative, vertical flipping is applied float theta; ///< Rotation angle in radians // FIXME: replace this with CPU tracking of filtering mode? From 958af6010652cbe3a235cdcd344ff39775d0eb4b Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Tue, 11 Jun 2024 01:09:48 +0200 Subject: [PATCH 46/48] rspq: avoid doxygen error in rspq.c (copy from trunk) --- src/rspq/rspq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rspq/rspq.c b/src/rspq/rspq.c index 5f08552455..01b9a883a5 100644 --- a/src/rspq/rspq.c +++ b/src/rspq/rspq.c @@ -1387,13 +1387,15 @@ void rspq_wait(void) } } +/// @cond void rspq_signal(uint32_t signal) { - const uint32_t allowed_mask = SP_WSTATUS_CLEAR_SIG0|SP_WSTATUS_SET_SIG0|SP_WSTATUS_CLEAR_SIG1|SP_WSTATUS_SET_SIG1; - assertf((signal & allowed_mask) == signal, "rspq_signal called with a mask that contains bits outside SIG0-1: %lx", signal); + const uint32_t allowed_mask = SP_WSTATUS_CLEAR_SIG0|SP_WSTATUS_SET_SIG0; + assertf((signal & allowed_mask) == signal, "rspq_signal called with a mask that contains bits outside SIG0: %lx", signal); rspq_int_write(RSPQ_CMD_WRITE_STATUS, signal); } +/// @endcond static void rspq_dma(void *rdram_addr, uint32_t dmem_addr, uint32_t len, uint32_t flags) { From ff7420adf2d92b51d42867e2e625d0accbd07901 Mon Sep 17 00:00:00 2001 From: thekovic <72971433+thekovic@users.noreply.github.com> Date: Sat, 8 Jun 2024 11:01:23 +0200 Subject: [PATCH 47/48] docs: Move public API docs to header files --- include/audio.h | 170 +++++++++++++++++- include/console.h | 81 ++++++++- include/dir.h | 30 ++++ include/dma.h | 193 ++++++++++++++++++++- include/dragonfs.h | 165 +++++++++++++++++- include/eeprom.h | 75 ++++++++ include/eepromfs.h | 128 ++++++++++++++ include/exception.h | 75 +++++++- include/graphics.h | 409 ++++++++++++++++++++++++++++++++++++++++++-- include/interrupt.h | 361 +++++++++++++++++++++++++++++++++++++- include/joybus.h | 56 +++++- include/mempak.h | 174 ++++++++++++++++++- include/n64sys.h | 160 ++++++++++++++++- include/rtc.h | 201 +++++++++++++++++++++- include/system.h | 105 +++++++++++- include/timer.h | 167 ++++++++++++++++-- include/tpak.h | 143 ++++++++++++++++ src/audio.c | 160 ----------------- src/console.c | 79 --------- src/dma.c | 183 -------------------- src/dragonfs.c | 164 ------------------ src/eeprom.c | 70 -------- src/eepromfs.c | 122 ------------- src/exception.c | 77 --------- src/graphics.c | 380 ---------------------------------------- src/interrupt.c | 330 ----------------------------------- src/joybus.c | 58 ------- src/mempak.c | 171 ------------------ src/n64sys.c | 152 ---------------- src/rtc.c | 201 ---------------------- src/system.c | 134 +-------------- src/timer.c | 151 ---------------- src/tpak.c | 134 --------------- 33 files changed, 2651 insertions(+), 2608 deletions(-) diff --git a/include/audio.h b/include/audio.h index c4966c2030..1c100d5213 100644 --- a/include/audio.h +++ b/include/audio.h @@ -9,6 +9,42 @@ #include #include +/** + * @defgroup audio Audio Subsystem + * @ingroup libdragon + * @brief Interface to the N64 audio hardware. + * + * The audio subsystem handles queueing up chunks of audio data for + * playback using the N64 audio DAC. The audio subsystem handles + * DMAing chunks of data to the audio DAC as well as audio callbacks + * when there is room for another chunk to be written. Buffer size + * is calculated automatically based on the requested audio frequency. + * The audio subsystem accomplishes this by interfacing with the audio + * interface (AI) registers. + * + * Because the audio DAC is timed off of the system clock of the N64, + * the audio subsystem needs to know what region the N64 is from. This + * is due to the fact that the system clock is timed differently for + * PAL, NTSC and MPAL regions. This is handled automatically by the + * audio subsystem based on settings left by the bootloader. + * + * Code attempting to output audio on the N64 should initialize the + * audio subsystem at the desired frequency and with the desired number + * of buffers using #audio_init. More audio buffers allows for smaller + * chances of audio glitches but means that there will be more latency + * in sound output. When new data is available to be output, code should + * check to see if there is room in the output buffers using + * #audio_can_write. Code can probe the current frequency and buffer + * size using #audio_get_frequency and #audio_get_buffer_length respectively. + * When there is additional room, code can add new data to the output + * buffers using #audio_write. Be careful as this is a blocking operation, + * so if code doesn't check for adequate room first, this function will + * not return until there is room and the samples have been written. + * When all audio has been written, code should call #audio_close to shut + * down the audio subsystem cleanly. + * @{ + */ + #ifdef __cplusplus extern "C" { #endif @@ -25,19 +61,149 @@ extern "C" { */ typedef void(*audio_fill_buffer_callback)(short *buffer, size_t numsamples); +/** + * @brief Initialize the audio subsystem + * + * This function will set up the AI to play at a given frequency and + * allocate a number of back buffers to write data to. + * + * @note Before re-initializing the audio subsystem to a new playback + * frequency, remember to call #audio_close. + * + * @param[in] frequency + * The frequency in Hz to play back samples at + * @param[in] numbuffers + * The number of buffers to allocate internally + */ void audio_init(const int frequency, int numbuffers); + +/** + * @brief Install a audio callback to fill the audio buffer when required. + * + * This function allows to implement a pull-based audio system. It registers + * a callback which will be invoked under interrupt whenever the AI is ready + * to have more samples enqueued. The callback can fill the provided audio + * data with samples that will be enqueued for DMA to AI. + * + * @param[in] fill_buffer_callback Callback to fill an empty audio buffer + */ void audio_set_buffer_callback(audio_fill_buffer_callback fill_buffer_callback); + +/** + * @brief Pause or resume audio playback + * + * Should only be used when a fill_buffer_callback has been set + * in #audio_init. + * Silence will be generated while playback is paused. + */ void audio_pause(bool pause); + +/** + * @brief Return whether there is an empty buffer to write to + * + * This function will check to see if there are any buffers that are not full to + * write data to. If all buffers are full, wait until the AI has played back + * the next buffer in its queue and try writing again. + */ volatile int audio_can_write(); + +/** + * @brief Write a chunk of silence + * + * This function will write silence to be played back by the audio system. + * It writes exactly #audio_get_buffer_length stereo samples. + * + * @note This function will block until there is room to write an audio sample. + * If you do not want to block, check to see if there is room by calling + * #audio_can_write. + */ void audio_write_silence(); + +/** + * @brief Close the audio subsystem + * + * This function closes the audio system and cleans up any internal + * memory allocated by #audio_init. + */ void audio_close(); + +/** + * @brief Return actual frequency of audio playback + * + * @return Frequency in Hz of the audio playback + */ int audio_get_frequency(); + +/** + * @brief Get the number of stereo samples that fit into an allocated buffer + * + * @note To get the number of bytes to allocate, multiply the return by + * 2 * sizeof( short ) + * + * @return The number of stereo samples in an allocated buffer + */ int audio_get_buffer_length(); + +/** + * @brief Start writing to the first free internal buffer. + * + * This function is similar to #audio_write but instead of taking samples + * and copying them to an internal buffer, it returns the pointer to the + * internal buffer. This allows generating the samples directly in the buffer + * that will be sent via DMA to AI, without any subsequent memory copy. + * + * The buffer should be filled with stereo interleaved samples, and + * exactly #audio_get_buffer_length samples should be written. + * + * After you have written the samples, call audio_write_end() to notify + * the library that the buffer is ready to be sent to AI. + * + * @note This function will block until there is room to write an audio sample. + * If you do not want to block, check to see if there is room by calling + * #audio_can_write. + * + * @return Pointer to the internal memory buffer where to write samples. + */ short* audio_write_begin(void); + +/** + * @brief Complete writing to an internal buffer. + * + * This function is meant to be used in pair with audio_write_begin(). + * Call this once you have generated the samples, so that the audio + * system knows the buffer has been filled and can be played back. + * + */ void audio_write_end(void); -int audio_push(const short *buffer, int nsamples, bool blocking); +/** + * @brief Push a chunk of audio data (high-level function) + * + * This function is an easy-to-use, higher level alternative to all + * the audio_write* functions. It pushes audio samples into output + * hiding the complexity required to match the fixed-size audio buffers. + * + * The function accepts a @p buffer of stereo interleaved audio samples; + * @p nsamples is the number of samples in the buffer. The function will + * push the samples into output as much as possible. + * + * If @p blocking is true, it will stop and wait until all samples have + * been pushed into output. If @p blocking is false, it will stop as soon + * as there are no more free buffers to push samples into, and will return + * the number of pushed samples. It is up to the caller to then take care + * of this and later try to call audio_push again with the remaining samples. + * + * @note You CANNOT mixmatch this function with the other audio_write* functions, + * and viceversa. If you decide to use audio_push, use it exclusively to + * push the audio. + * + * @param buffer Buffer containing stereo samples to be played + * @param nsamples Number of stereo samples in the buffer + * @param blocking If true, wait until all samples have been pushed + * @return int Number of samples pushed into output + */ +int audio_push(const short *buffer, int nsamples, bool blocking); __attribute__((deprecated("use audio_write_begin or audio_push instead"))) void audio_write(const short * const buffer); @@ -46,4 +212,6 @@ void audio_write(const short * const buffer); } #endif +/** @} */ /* display */ + #endif diff --git a/include/console.h b/include/console.h index bc32516dc8..faf7cc2d68 100644 --- a/include/console.h +++ b/include/console.h @@ -11,8 +11,28 @@ #include "display.h" /** - * @addtogroup console - * @{ + * @defgroup console Console Support + * @ingroup display + * @brief Software console emulation for debugging and simple text output. + * + * Console support is provided as a poor-man's console for simple debugging on + * the N64. It does not respect common escape sequences and is nonstandard in + * size. When using the console, code should be careful to make sure that the + * display system has not been initialized. Similarly, if the display system + * is needed, code should be sure that the console is not initialized. + * + * Code wishing to use the console should first initialize the console support in + * libdragon with #console_init. Once the console has been initialized, it wil + * operate in one of two modes. In automatic mode, every write to the console will + * be immediately displayed on the screen. The console will be scrolled when the + * buffer fills. In manual mode, the console will only be displayed after calling + * #console_render. To set the render mode, use #console_set_render_mode. To + * add data to the console, use printf or iprintf. To clear the console and reset + * the scroll, use #console_clear. Once the console is not needed or when the + * code wishes to switch to the display subsystem, #console_clear should be called + * to cleanly shut down the console support. + * + * @{ */ /** @@ -78,11 +98,68 @@ extern "C" { #endif +/** + * @brief Initialize the console + * + * Initialize the console system. This will initialize the video properly, so + * a call to the display_init() fuction is not necessary. + */ void console_init(); + +/** + * @brief Close the console + * + * Free the console system. This will clean up any dynamic memory that was in + * use. + */ void console_close(); + +/** + * @brief Send console output to debug channel + * + * Configure whether the console output should be redirected to the debug channel + * as well (stderr), that can be sent over USB for development purposes. See + * #debugf for more information. + * + * @param[in] debug + * True if console output should also be sent to the debugging channel, false otherwise + * + */ void console_set_debug(bool debug); + +/** + * @brief Set the console rendering mode + * + * This sets the render mode of the console. The #RENDER_AUTOMATIC mode allows + * console_printf to immediately be placed onto the screen. This is very similar + * to a normal console on a unix/windows system. The #RENDER_MANUAL mode allows + * console_printf to be buffered, and displayed at a later date using + * console_render(). This is to allow a rendering interface somewhat analogous + * to curses + * + * @param[in] mode + * Render mode (#RENDER_AUTOMATIC or #RENDER_MANUAL) + */ void console_set_render_mode(int mode); + +/** + * @brief Clear the console + * + * Clear the console and set the virtual cursor back to the top left. + */ void console_clear(); + +/** + * @brief Render the console + * + * Render the console to the screen. This should be called when in manual + * rendering mode to display the console to the screen. In automatic mode + * it is not necessary to call. + * + * The color that is used to draw the text can be set using #graphics_set_color. + * + * Do not call while interrupts are disabled, or it will lock the system. + */ void console_render(); #ifdef __cplusplus diff --git a/include/dir.h b/include/dir.h index ba335999dd..ed36694545 100644 --- a/include/dir.h +++ b/include/dir.h @@ -51,7 +51,37 @@ typedef struct /** @} */ +/** + * @brief Find the first file in a directory + * + * This function should be called to start enumerating a directory or whenever + * a directory enumeration should be restarted. + * + * @param[in] path + * Path to the directory structure + * @param[out] dir + * Directory entry structure to populate with first entry + * + * @return 0 on successful lookup, -1 if the directory existed and is empty, + * or a different negative value on error (in which case, errno will be set). + */ int dir_findfirst( const char * const path, dir_t *dir ); + +/** + * @brief Find the next file in a directory + * + * After finding the first file in a directory using #dir_findfirst, call this to retrieve + * the rest of the directory entries. Call this repeatedly until a negative error is returned + * signifying that there are no more directory entries in the directory. + * + * @param[in] path + * Path to the directory structure + * @param[out] dir + * Directory entry structure to populate with next entry + * + * @return 0 on successful lookup, -1 if there are no more files in the directory, + * or a different negative value on error (in which case, errno will be set). + */ int dir_findnext( const char * const path, dir_t *dir ); #ifdef __cplusplus diff --git a/include/dma.h b/include/dma.h index b01bcd1bb5..ecfcd1777b 100644 --- a/include/dma.h +++ b/include/dma.h @@ -9,6 +9,27 @@ #include #include +/** + * @defgroup dma DMA Controller + * @ingroup lowlevel + * @brief DMA functionality for transfers between cartridge space and RDRAM + * + * The DMA controller is responsible for handling block and word accesses from + * the cartridge domain. Because of the nature of the cartridge interface, code + * cannot use memcpy or standard pointer accesses on memory mapped to the cartridge. + * Consequently, the peripheral interface (PI) provides a DMA controller for + * accessing data. + * + * The DMA controller requires no initialization. Using #dma_read and #dma_write + * will allow reading from the cartridge and writing to the cartridge respectively + * in block mode. #io_read and #io_write will allow a single 32-bit integer to + * be read from or written to the cartridge. These are especially useful for + * manipulating registers on a cartridge such as a gameshark. Code should never + * make raw 32-bit reads or writes in the cartridge domain as it could collide with + * an in-progress DMA transfer or run into caching issues. + * @{ + */ + #ifdef __cplusplus extern "C" { #endif @@ -19,21 +40,185 @@ extern "C" { #define PI_WR_LEN ((volatile uint32_t*)0xA460000C) ///< PI DMA: write length register #define PI_STATUS ((volatile uint32_t*)0xA4600010) ///< PI: status register +/** + * @brief Start writing data to a peripheral through PI DMA (low-level) + * + * This function should be used when writing to a cartridge peripheral (typically + * ROM). This function just begins executing a raw DMA transfer, which is + * well-defined only for RAM addresses which are multiple of 8, ROM addresses + * which are multiple of 2, and lengths which are multiple of 2. + * + * Use #dma_wait to wait for the end of the transfer. + * + * + * @param[out] ram_address + * Pointer to a buffer to read data from (must be 8-byte aligned) + * @param[in] pi_address + * Memory address of the peripheral to write to (must be 2-byte aligned) + * @param[in] len + * Length in bytes to write into pi_address (must be multiple of 2) + */ void dma_write_raw_async(const void *ram_address, unsigned long pi_address, unsigned long len); + +/** + * @brief Write to a peripheral + * + * This function should be used when writing to the cartridge. + * + * @param[in] ram_address + * Pointer to a buffer to read data from + * @param[in] pi_address + * Cartridge address to write to (must be in range (0x10000000-0x1FFFFFFF). + * @param[in] len + * Length in bytes to write to peripheral + * + * @note This function has always had an historical mistake: the pi_address is mangled + * to be forced into the ROM area (0x10000000-0x1FFFFFFF). This is wrong as the + * PI bus has full 32-bit address, and the same function could have been used + * to access the whole range. + * If you need to read outside the ROM area, use #dma_write_raw_async instead. + */ void dma_write(const void * ram_address, unsigned long pi_address, unsigned long len); + +/** + * @brief Start reading data from a peripheral through PI DMA (low-level) + * + * This function should be used when reading from a cartridge peripheral (typically + * ROM). This function just begins executing a raw DMA transfer, which is + * well-defined only for RAM addresses which are multiple of 8, ROM addresses + * which are multiple of 2, and lengths which are multiple of 2. + * + * Use #dma_wait to wait for the end of the transfer. + * + * See #dma_read_async for a higher level primitive which can perform almost + * arbitrary transfers. + * + * @param[out] ram_address + * Pointer to a buffer to place read data (must be 8-byte aligned) + * @param[in] pi_address + * Memory address of the peripheral to read from (must be 2-byte aligned) + * @param[in] len + * Length in bytes to read into ram_address (must be multiple of 2) + */ void dma_read_raw_async(void *ram_address, unsigned long pi_address, unsigned long len); -void dma_read_async(void *ram_address, unsigned long pi_address, unsigned long len); + +/** + * @brief Start reading data from a peripheral through PI DMA + * + * This function must be used when reading a chunk of data from a cartridge + * peripheral (typically, ROM). It is a wrapper over #dma_read_raw_async that allows + * arbitrary aligned addresses and any length (including odd sizes). For + * fully-aligned addresses it quickly falls back to #dma_read_raw_async, so it can + * be used generically as "default" PI DMA transfer function. + * + * The only constraint on alignment is that the RAM and PI addresses must have + * the same 1-bit misalignment, that is they must either be even addresses or + * odd addresses. Notice that this function will assert if this constraint is + * not respected. + * + * Use #dma_wait to wait for the end of the transfer. + * + * For non performance sensitive tasks such as reading and parsing data from + * ROM at loading time, a better option is to use DragonFS, where #dfs_read + * falls back to a CPU memory copy to realign the data when required. + * + * @param[out] ram_pointer + * Pointer to a buffer in RDRAM to place read data + * @param[in] pi_address + * Memory address of the peripheral to read from + * @param[in] len + * Length in bytes to read into ram_pointer + */ +void dma_read_async(void *ram_pointer, unsigned long pi_address, unsigned long len); + +/** + * @brief Read data from a peripheral through PI DMA, waiting for completion. + * + * This function performs a blocking read. See #dma_read_async for more information. + * + * @param[out] ram_address + * Pointer to a buffer in RDRAM to place read data + * @param[in] pi_address + * ROM address to read from (must be in range (0x10000000-0x1FFFFFFF). + * @param[in] len + * Length in bytes to read into ram_address + * + * @note This function has always had an historical mistake: the pi_address is mangled + * to be forced into the ROM area (0x10000000-0x1FFFFFFF). This is wrong as the + * PI bus has full 32-bit address, and the same function could have been used + * to access the whole range. + * If you need to read outside the ROM area, use #dma_read_async instead. + */ void dma_read(void * ram_address, unsigned long pi_address, unsigned long len); + +/** + * @brief Wait until an async DMA or I/O transfer is finished. + */ void dma_wait(void); -/* 32 bit IO read from PI device */ + +/** + * @brief Read a 32 bit integer from a peripheral using the CPU. + * + * @param[in] pi_address + * Memory address of the peripheral to read from + * + * @return The 32 bit value read from the peripheral + * + * @note This function only works if the specified PI address falls within a range + * which is memory mapped on the CPU. See #io_accessible for more information. + * + * @see #io_accessible + */ uint32_t io_read(uint32_t pi_address); -/* 32 bit IO write to PI device */ +/** + * @brief Write a 32 bit integer to a peripheral using the CPU. + * + * Notice that writes are performed asynchronously, so the data might have not been + * fully written to the peripheral yet when the function returns. Use #dma_wait if + * you need to wait for the transfer to be finished. + * + * @param[in] pi_address + * Memory address of the peripheral to write to + * @param[in] data + * 32 bit value to write to peripheral + * + * @note This function only works if the specified PI address falls within a range + * which is memory mapped on the CPU. See #io_accessible for more information. + * + * @see #io_accessible + */ void io_write(uint32_t pi_address, uint32_t data); +/** + * @brief Check whether the specified PI address can be accessed doing I/O from CPU + * + * The PI bus covers the full 32-bit address range. The full range is only accessible + * via DMA, though. A part of the range is also memory mapped to the CPU and can be + * accessed via #io_read and #io_write. + * + * The ranges of PI address that can be accessed via CPU are: + * + * * 0x0500_0000 - 0x0FFF_FFFF: used by N64DD and SRAM on cartridge + * * 0x1000_0000 - 0x1FBF_FFFF: cartridge ROM + * * 0x1FD0_0000 - 0x1FFF_FFFF: no known PI peripherals use this + * + * The rest of the 32-bit address range is only accessible via DMA. + * + * Notice also that the range 0x2000_0000 - 0x7FFF_FFFF is theoretically accessible + * by the CPU but only via 64-bit addressing, so it requires assembly instructions + * (as the libdragon toolchain uses 32-bit pointers). No known PI peripherals use this + * range anyway. + * + * This function checks whether the specified address falls into the range accessible + * via CPU or not. + * + * @param pi_address PI address to check + * @return True if the address is memory mapped, false if it is not + */ bool io_accessible(uint32_t pi_address); __attribute__((deprecated("use dma_wait instead"))) @@ -44,4 +229,6 @@ volatile int dma_busy(void); } #endif +/** @} */ /* dma */ + #endif diff --git a/include/dragonfs.h b/include/dragonfs.h index ae26c910fd..82d25b59f2 100644 --- a/include/dragonfs.h +++ b/include/dragonfs.h @@ -6,8 +6,42 @@ #ifndef __LIBDRAGON_DRAGONFS_H #define __LIBDRAGON_DRAGONFS_H -/** - * @addtogroup dfs +/** + * @defgroup dfs DragonFS + * @ingroup asset + * @brief DragonFS filesystem implementation and newlib hooks. + * + * DragonFS is a read only ROM filesystem for the N64. It provides an interface + * that homebrew developers can use to load resources from cartridge space that + * were not available at compile time. This can mean sprites or other game assets, + * or the filesystem can be appended at a later time if the homebrew developer wishes + * end users to be able to insert custom levels, music or other assets. It is loosely + * based off of FAT with consideration into application and limitations of the N64. + * + * The filesystem can be generated using 'mkdfs' which is included in the 'tools' + * directory of libdragon. Due to the read-only nature, DFS does not support empty + * files or empty directories. Attempting to create a filesystem with either of + * these using 'mkdfs' will result in an error. If a filesystem contains either empty + * files or empty directories, the result of manipulating the filesystem is undefined. + * + * DragonFS does not support writing, renaming or symlinking of files. It supports only + * file and directory types. + * + * DFS files have a maximum size of 256 MiB. Directories can have an unlimited + * number of files in them. Each token (separated by a / in the path) can be 243 characters + * maximum. Directories can be 100 levels deep at maximum. There can be 4 files open + * simultaneously. + * + * When DFS is initialized, it will register itself with newlib using 'rom:/' as a prefix. + * Files can be accessed either with standard POSIX functions (open, fopen) using the 'rom:/' + * prefix or the lower-level DFS API calls without prefix. In most cases, it is not necessary + * to use the DFS API directly, given that the standard C functions are more comprehensive. + * Files can be opened using both sets of API calls simultaneously as long as no more than + * four files are open at any one time. + * + * DragonFS does not support file compression; if you want to compress your assets, + * use the asset API (#asset_load / #asset_fopen). + * * @{ */ @@ -83,17 +117,144 @@ extern "C" { #endif +/** + * @brief Initialize the filesystem. + * + * Given a base offset where the filesystem should be found, this function will + * initialize the filesystem to read from cartridge space. This function will + * also register DragonFS with newlib so that standard POSIX/C file operations + * work with DragonFS, using the "rom:/" prefix". + * + * The function needs to know where the DFS image is located within the cartridge + * space. To simplify this, you can pass #DFS_DEFAULT_LOCATION which tells + * #dfs_init to search for the DFS image by itself, using the rompak TOC (see + * rompak_internal.h). Most users should use this option. + * + * Otherwise, if the ROM cannot be built with a rompak TOC for some reason, + * a virtual address should be passed. This is normally 0xB0000000 + the offset + * used when building your ROM + the size of the header file used (typically 0x1000). + * + * @param[in] base_fs_loc + * Virtual address in cartridge space at which to find the filesystem, or + * DFS_DEFAULT_LOCATION to automatically search for the filesystem in the + * cartridge (using the rompak). + * + * @return DFS_ESUCCESS on success or a negative error otherwise. + */ int dfs_init(uint32_t base_fs_loc); + +/** + * @brief Open a file given a path + * + * Check if we have any free file handles, and if we do, try + * to open the file specified. Supports absolute and relative + * paths + * + * @param[in] path + * Path of the file to open + * + * @return A valid file handle to reference the file by or a negative error on failure. + */ int dfs_open(const char * const path); + +/** + * @brief Read data from a file + * + * Note that no caching is performed: if you need to read small amounts + * (eg: one byte at a time), consider using standard C API instead (fopen()) + * which performs internal buffering to avoid too much overhead. + * + * @param[out] buf + * Buffer to read into + * @param[in] size + * Size of each element to read + * @param[in] count + * Number of elements to read + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * + * @return The actual number of bytes read or a negative value on failure. + */ int dfs_read(void * const buf, int size, int count, uint32_t handle); + +/** + * @brief Seek to an offset in the file + * + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * @param[in] offset + * A byte offset from the origin to seek from. + * @param[in] origin + * An offset to seek from. Either `SEEK_SET`, `SEEK_CUR` or `SEEK_END`. + * + * @return DFS_ESUCCESS on success or a negative value on error. + */ int dfs_seek(uint32_t handle, int offset, int origin); + +/** + * @brief Return the current offset into a file + * + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * + * @return The current byte offset into a file or a negative error on failure. + */ int dfs_tell(uint32_t handle); + +/** + * @brief Close an already open file handle. + * + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * + * @return DFS_ESUCCESS on success or a negative value on error. + */ int dfs_close(uint32_t handle); + +/** + * @brief Return whether the end of file has been reached + * + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * + * @return 1 if the end of file is reached, 0 if not, and a negative value on error. + */ int dfs_eof(uint32_t handle); + +/** + * @brief Return the file size of an open file + * + * @param[in] handle + * A valid file handle as returned from #dfs_open. + * + * @return The file size in bytes or a negative value on failure. + */ int dfs_size(uint32_t handle); + +/** + * @brief Return the physical address of a file (in ROM space) + * + * This function should be used for highly-specialized, high-performance + * use cases. Using dfs_open / dfs_read is generally acceptable + * performance-wise, and is easier to use rather than managing + * direct access to PI space. + * + * Direct access to ROM data must go through io_read or dma_read. Do not + * dereference directly as the console might hang if the PI is busy. + * + * @param[in] path + * Name of the file + * + * @return A pointer to the physical address of the file body, or 0 + * if the file was not found. + * + */ uint32_t dfs_rom_addr(const char *path); +/** + * @brief Convert DFS error code into an error string + */ const char *dfs_strerror(int error); __attribute__((deprecated("relative paths support is deprecated; please use only absolute paths when interacting with DragonFS"))) diff --git a/include/eeprom.h b/include/eeprom.h index 77f8fe80f9..ff8e869046 100644 --- a/include/eeprom.h +++ b/include/eeprom.h @@ -54,11 +54,86 @@ typedef enum eeprom_type_t extern "C" { #endif +/** + * @brief Probe the EEPROM interface on the cartridge. + * + * Inspect the identifier half-word of the EEPROM status response to + * determine which EEPROM save type is available (if any). + * + * @return which EEPROM type was detected on the cartridge. + */ eeprom_type_t eeprom_present( void ); + +/** + * @brief Determine how many blocks of EEPROM exist on the cartridge. + * + * @return 0 if EEPROM was not detected + * or the number of EEPROM 8-byte save blocks available. + */ size_t eeprom_total_blocks( void ); + +/** + * @brief Read a block from EEPROM. + * + * @param[in] block + * Block to read data from. Joybus accesses EEPROM in 8-byte blocks. + * + * @param[out] dest + * Destination buffer for the eight bytes read from EEPROM. + */ void eeprom_read( uint8_t block, uint8_t * dest ); + +/** + * @brief Write a block to EEPROM. + * + * @param[in] block + * Block to write data to. Joybus accesses EEPROM in 8-byte blocks. + * + * @param[in] src + * Source buffer for the eight bytes of data to write to EEPROM. + * + * @return the EEPROM status byte + */ uint8_t eeprom_write( uint8_t block, const uint8_t * src ); + +/** + * @brief Read a buffer of bytes from EEPROM. + * + * This is a high-level convenience helper that abstracts away the + * one-at-a-time EEPROM block access pattern. + * + * @param[out] dest + * Destination buffer to read data into + * @param[in] start + * Byte offset in EEPROM to start reading data from + * @param[in] len + * Byte length of data to read into buffer + */ void eeprom_read_bytes( uint8_t * dest, size_t start, size_t len ); + +/** + * @brief Write a buffer of bytes to EEPROM. + * + * This is a high-level convenience helper that abstracts away the + * one-at-a-time EEPROM block access pattern. + * + * Each EEPROM block write takes approximately 15 milliseconds; + * this operation may block for a while with large buffer sizes: + * + * * 4k EEPROM: 64 blocks * 15ms = 960ms! + * * 16k EEPROM: 256 blocks * 15ms = 3840ms! + * + * You may want to pause audio before calling this. + * + * @param[in] src + * Source buffer containing data to write + * + * @param[in] start + * Byte offset in EEPROM to start writing data to + * + * @param[in] len + * Byte length of the src buffer + */ void eeprom_write_bytes( const uint8_t * src, size_t start, size_t len ); #ifdef __cplusplus diff --git a/include/eepromfs.h b/include/eepromfs.h index f609f39b27..df2f91080d 100644 --- a/include/eepromfs.h +++ b/include/eepromfs.h @@ -73,14 +73,142 @@ typedef struct eepfs_entry_t size_t size; } eepfs_entry_t; +/** + * @brief Initializes the EEPROM filesystem. + * + * Creates a lookup table of file descriptors based on the configuration + * and validates that the current EEPROM data is likely to be compatible + * with the configured file descriptors. + * + * If the configured filesystem does not fit in the available EEPROM blocks + * on the cartridge, initialization will fail. Even if your total file size + * fits in EEPROM, your filesystem may not fit due to overhead and padding. + * Note that 1 block is reserved for the filesystem signature, and all files + * must start on a block boundary. + * + * You can mitigate this by ensuring that your files are aligned to the + * 8-byte block size and minimizing wasted space with packed structs. + * + * Each file will take up a minimum of 1 block, plus the filesystem itself + * reserves the first block of EEPROM, so the entry count has a practical + * limit of the number of available EEPROM blocks minus 1: + * + * * 4k EEPROM: 63 files maximum. + * * 16k EEPROM: 255 files maximum. + * + * @param[in] entries + * An array of file paths and sizes; see #eepfs_entry_t + * @param[in] count + * The number of entries in the array + * + * @return EEPFS_ESUCCESS on success or a negative error otherwise + */ int eepfs_init(const eepfs_entry_t * entries, size_t count); + +/** + * @brief De-initializes the EEPROM filesystem. + * + * This cleans up the file lookup table. + * + * You probably won't ever need to call this. + * + * @return EEPFS_ESUCCESS on success or a negative error otherwise + */ int eepfs_close(void); + +/** + * @brief Reads an entire file from the EEPROM filesystem. + * + * @param[in] path + * Path of file in EEPROM filesystem to read from + * @param[out] dest + * Buffer to read into + * @param[in] size + * Size of the destination buffer (in bytes) + * + * @return EEPFS_ESUCCESS on success or a negative error otherwise + */ int eepfs_read(const char * path, void * dest, size_t size); + +/** + * @brief Writes an entire file to the EEPROM filesystem. + * + * Each EEPROM block write takes approximately 15 milliseconds; + * this operation may block for a while! + * + * @param[in] path + * Path of file in EEPROM filesystem to write to + * @param[in] src + * Buffer of data to be written + * @param[in] size + * Size of the source buffer (in bytes) + * + * @return EEPFS_ESUCCESS on success or a negative error otherwise + */ int eepfs_write(const char * path, const void * src, size_t size); + +/** + * @brief Erases a file in the EEPROM filesystem. + * + * Note that "erasing" a file just means writing it full of zeroes. + * All files in the filesystem must always exist at the size specified + * during #eepfs_init + * + * Each EEPROM block write takes approximately 15 milliseconds; + * this operation may block for a while! + * + * Be advised: this is a destructive operation that cannot be undone! + * + * @retval EEPFS_ESUCCESS if successful + * @retval EEPFS_ENOFILE if the path is not a valid file + * @retval EEPFS_EBADINPUT if the path is NULL + */ int eepfs_erase(const char * path); + +/** + * @brief Validates the first block of EEPROM. + * + * There are no guarantees that the data in EEPROM actually matches + * the expected layout of the filesystem. There are many reasons why + * a mismatch can occur: EEPROM re-used from another game; a brand new + * EEPROM that has never been initialized and contains garbage data; + * the filesystem has changed between builds or version of software + * currently in development; EEPROM failing due to age or write limits. + * + * To mitigate these scenarios, it is a good idea to validate that at + * least the first block of EEPROM matches some known good value. + * + * If the signature matches, the data in EEPROM is probably what the + * filesystem expects. If not, the best move is to erase everything + * and start from zero. + * + * @see eepfs_generate_signature + * @see #eepfs_wipe + * + * @retval true if the signature in EEPROM matches the filesystem signature + * @retval false if the signature in EEPROM does not match the filesystem signature + */ bool eepfs_verify_signature(void); + +/** + * @brief Erases all blocks in EEPROM and sets a new signature. + * + * This is useful when you want to erase all files in the filesystem. + * + * Each EEPROM block write takes approximately 15 milliseconds; + * this operation may block for a while: + * + * * 4k EEPROM: 64 blocks * 15ms = 960ms! + * * 16k EEPROM: 256 blocks * 15ms = 3840ms! + * + * You may want to pause audio in advance of calling this. + * + * Be advised: this is a destructive operation that cannot be undone! + * + * @see #eepfs_verify_signature + */ void eepfs_wipe(void); #ifdef __cplusplus diff --git a/include/exception.h b/include/exception.h index 9b1c90b2ea..e27a663d1b 100644 --- a/include/exception.h +++ b/include/exception.h @@ -9,7 +9,16 @@ #include /** - * @addtogroup exceptions + * @defgroup exceptions Exception Handler + * @ingroup lowlevel + * @brief Handle hardware-generated exceptions. + * + * The exception handler traps exceptions generated by hardware. This could + * be an invalid instruction or invalid memory access exception or it could + * be a reset exception. In both cases, a handler registered with + * #register_exception_handler will be passed information regarding the + * exception type and relevant registers. + * * @{ */ @@ -133,10 +142,72 @@ typedef void (*exception_handler_t)(exception_t *exc); */ typedef void (*syscall_handler_t)(exception_t *exc, uint32_t code); +/** + * @brief Register an exception handler to handle exceptions + * + * The registered handle is responsible for clearing any bits that may cause + * a re-trigger of the same exception and updating the EPC. An important + * example is the cause bits (12-17) of FCR31 from cop1. To prevent + * re-triggering the exception they should be cleared by the handler. + * + * To manipulate the registers, update the values in the exception_t struct. + * They will be restored to appropriate locations when returning from the + * handler. Setting them directly will not work as expected as they will get + * overwritten with the values pointed by the struct. + * + * There is only one exception to this, cr (cause register) which is also + * modified by the int handler before the saved values are restored thus it + * is only possible to update it through C0_WRITE_CR macro if it is needed. + * This shouldn't be necessary though as they are already handled by the + * library. + * + * k0 ($26), k1 ($27) are not saved/restored and will not be available in the + * handler. Theoretically we can exclude s0-s7 ($16-$23), and gp ($28) to gain + * some performance as they are already saved by GCC when necessary. The same + * is true for sp ($29) and ra ($31) but current interrupt handler manipulates + * them via allocating a new stack and doing a jal. Similarly floating point + * registers f21-f31 are callee-saved. In the future we may consider removing + * them from the save state for interrupts (but not for exceptions) + * + * @param[in] cb + * Callback function to call when exceptions happen + */ exception_handler_t register_exception_handler( exception_handler_t cb ); + +/** + * @brief Default exception handler. + * + * This handler is installed by default for all exceptions. It initializes + * the console and dump the exception state to the screen, including the value + * of all GPR/FPR registers. It then calls abort() to abort execution. + */ void exception_default_handler( exception_t* ex ); -void register_syscall_handler( syscall_handler_t cb, uint32_t first_code, uint32_t last_code ); + +/** + * @brief Register a handler that will be called when a syscall exception + * + * This function allows to register a handler to be invoked in response to a + * syscall exception, generated by the SYSCALL opcode. The opcode allows to + * specify a 20-bit code which, in a more traditional operating system architecture, + * corresponds to the "service" to be called. + * + * When the registered handler returns, the execution will resume from the + * instruction following the syscall one. + * + * To allow for different usages of the code field, this function accepts + * a range of codes to associated with the handler. This allows a single handler + * to be invoked for multiple different codes, to specialize services. + * + * @note Syscall codes in the range 0x00000 - 0x0FFFF are reserved to libdragon + * itself. Use a code outside that range to avoid conflicts with future versions + * of libdragon. + * + * @param handler Handler to invoke when a syscall exception is triggered + * @param first_code First syscall code to associate with this handler (begin of range) + * @param last_code Last syscall code to associate with this handler (end of range) + */ +void register_syscall_handler( syscall_handler_t handler, uint32_t first_code, uint32_t last_code ); #ifdef __cplusplus } diff --git a/include/graphics.h b/include/graphics.h index 9c8257aebd..3081005ca8 100644 --- a/include/graphics.h +++ b/include/graphics.h @@ -8,6 +8,36 @@ #include +/** + * @defgroup graphics 2D Graphics + * @ingroup display + * @brief Software routines for manipulating graphics in a display context. + * + * The graphics subsystem is responsible for software manipulation of a display + * context as returned from the @ref display. All of the functions use a pure + * software drawing method and are thus much slower than hardware sprite support. + * However, they are slightly more flexible and offer no hardware limitations + * in terms of sprite size. + * + * Code wishing to draw to the screen should first acquire a display context + * using #display_get. Once the display context is acquired, code may draw to + * the context using any of the graphics functions present. Wherever practical, + * two versions of graphics functions are available: a transparent variety and + * a non-transparent variety. Code that wishes to display sprites without + * transparency can get a slight performance boost by using the non-transparent + * variety of calls since no software alpha blending needs to occur. Once + * code has finished drawing to the display context, it can be displayed to the + * screen using #display_show. + * + * The graphics subsystem makes use of the same contexts as the @ref rdp. Thus, + * with careful coding, both hardware and software routines can be used to draw + * to the display context with no ill effects. The colors returned by + * #graphics_make_color and #graphics_convert_color are also compatible with both + * hardware and software graphics routines. + * + * @{ + */ + #ifdef __cplusplus extern "C" { #endif @@ -17,11 +47,6 @@ typedef struct surface_s surface_t; typedef struct sprite_s sprite_t; ///@endcond -/** - * @addtogroup graphics - * @{ - */ - /** @brief Generic color structure */ typedef struct __attribute__((packed)) { @@ -69,23 +94,387 @@ inline color_t color_from_packed32(uint32_t c) { return (color_t){ .r=(uint8_t)(c>>24), .g=(uint8_t)(c>>16), .b=(uint8_t)(c>>8), .a=(uint8_t)c }; } +/** + * @brief Return a packed 32-bit representation of an RGBA color + * + * This is exactly the same as calling `graphics_convert_color(RGBA32(r,g,b,a))`. + * Refer to #graphics_convert_color for more information. + * + * @deprecated By switching to the rdpq API, this function should not be required + * anymore. Use #RGBA32 or #RGBA16 instead. Please avoid using it in new code if possible. + * + * @param[in] r + * 8-bit red value + * @param[in] g + * 8-bit green value + * @param[in] b + * 8-bit blue value + * @param[in] a + * 8-bit alpha value. Note that 255 is opaque and 0 is transparent + * + * @return a 32-bit representation of the color suitable for blitting in software or hardware + * + * @see #graphics_convert_color + * + */ uint32_t graphics_make_color( int r, int g, int b, int a ); + +/** + * @brief Convert a color structure to a 32-bit representation of an RGBA color + * + * This function is similar to #color_to_packed16 and #color_to_packed32, but + * automatically picks the version matching with the current display configuration. + * Notice that this might be wrong if you are drawing to an arbitrary surface rather + * than a framebuffer. + * + * @note In 16 bpp mode, this function will return a packed 16-bit color + * in BOTH the lower 16 bits and the upper 16 bits. In general, this is not necessary. + * However, for drawing with the old deprecated RDP API (in particular, + * rdp_set_primitive_color), this is still required. + * + * @deprecated By switching to the rdpq API, this function should not be required + * anymore. Please avoid using it in new code if possible. + * + * @param[in] color + * A color structure representing an RGBA color + * + * @return a 32-bit representation of the color suitable for blitting in software or hardware + */ uint32_t graphics_convert_color( color_t color ); -void graphics_draw_pixel( surface_t* surf, int x, int y, uint32_t c ); -void graphics_draw_pixel_trans( surface_t* surf, int x, int y, uint32_t c ); -void graphics_draw_line( surface_t* surf, int x0, int y0, int x1, int y1, uint32_t c ); -void graphics_draw_line_trans( surface_t* surf, int x0, int y0, int x1, int y1, uint32_t c ); + +/** + * @brief Draw a pixel to a given display context + * + * @note This function does not support transparency for speed purposes. To draw + * a transparent or translucent pixel, use #graphics_draw_pixel_trans. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The x coordinate of the pixel. + * @param[in] y + * The y coordinate of the pixel. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ +void graphics_draw_pixel( surface_t* surf, int x, int y, uint32_t color ); + +/** + * @brief Draw a pixel to a given display context with alpha support + * + * @note This function is much slower than #graphics_draw_pixel for 32-bit + * pixels due to the need to sample the current pixel to do software alpha-blending. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The x coordinate of the pixel. + * @param[in] y + * The y coordinate of the pixel. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ +void graphics_draw_pixel_trans( surface_t* surf, int x, int y, uint32_t color ); + +/** + * @brief Draw a line to a given display context + * + * @note This function does not support transparency for speed purposes. To draw + * a transparent or translucent line, use #graphics_draw_line_trans. + * + * @param[in] surf + * The currently active display context. + * @param[in] x0 + * The x coordinate of the start of the line. + * @param[in] y0 + * The y coordinate of the start of the line. + * @param[in] x1 + * The x coordinate of the end of the line. + * @param[in] y1 + * The y coordinate of the end of the line. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ +void graphics_draw_line( surface_t* surf, int x0, int y0, int x1, int y1, uint32_t color ); + +/** + * @brief Draw a line to a given display context with alpha support + * + * @note This function is much slower than #graphics_draw_line for 32-bit + * buffers due to the need to sample the current pixel to do software alpha-blending. + * + * @param[in] surf + * The currently active display context. + * @param[in] x0 + * The x coordinate of the start of the line. + * @param[in] y0 + * The y coordinate of the start of the line. + * @param[in] x1 + * The x coordinate of the end of the line. + * @param[in] y1 + * The y coordinate of the end of the line. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ +void graphics_draw_line_trans( surface_t* surf, int x0, int y0, int x1, int y1, uint32_t color ); + +/** + * @brief Draw a filled rectangle to a display context + * + * @note This function does not support transparency for speed purposes. To draw + * a transparent or translucent box, use #graphics_draw_box_trans. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The x coordinate of the top left of the box. + * @param[in] y + * The y coordinate of the top left of the box. + * @param[in] width + * The width of the box in pixels. + * @param[in] height + * The height of the box in pixels. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ void graphics_draw_box( surface_t* surf, int x, int y, int width, int height, uint32_t color ); + +/** + * @brief Draw a filled rectangle to a display context + * + * @note This function is much slower than #graphics_draw_box for 32-bit + * buffers due to the need to sample the current pixel to do software alpha-blending. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The x coordinate of the top left of the box. + * @param[in] y + * The y coordinate of the top left of the box. + * @param[in] width + * The width of the box in pixels. + * @param[in] height + * The height of the box in pixels. + * @param[in] color + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ void graphics_draw_box_trans( surface_t* surf, int x, int y, int width, int height, uint32_t color ); + +/** + * @brief Fill the entire screen with a particular color + * + * @note Since this function is designed for blanking the screen, alpha values for + * colors are ignored. + * + * @param[in] surf + * The currently active display context. + * @param[in] c + * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + */ void graphics_fill_screen( surface_t* surf, uint32_t c ); + +/** + * @brief Set the current forecolor and backcolor for text operations + * + * @param[in] forecolor + * 32-bit RGBA color to use as the text color. Use #graphics_convert_color + * or #graphics_make_color to generate this value. + * @param[in] backcolor + * 32-bit RGBA color to use as the background color for text. Use + * #graphics_convert_color or #graphics_make_color to generate this value. + * Note that if the color given is transparent, text can be written over + * other graphics without background colors showing. + */ void graphics_set_color( uint32_t forecolor, uint32_t backcolor ); + +/** + * @brief Set the font to the default. + */ void graphics_set_default_font( void ); + +/** + * @brief Set the current font. Should be set before using any of the draw function. + * + * The sprite font should be imported using hslices/vslices according to the amount of characters it has. + * The amount of hslices vs vslices does not matter for this, but it should include the whole ASCII + * range that you will want to use, including characters from the 0 to 32 range. Normally the sprite should have + * 127 slices to cover the normal ASCII range. + * + * During rendering, the slice used will be the same number as the char (eg.: character 'A' will use slice 65). + * + * You can see an example of a sprite font (that has the default font double sized) under examples/customfont. + * + * @param[in] font + * Sprite font to be used. + */ void graphics_set_font_sprite( sprite_t *font ); -void graphics_draw_character( surface_t* surf, int x, int y, char c ); + +/** + * @brief Draw a character to the screen using the built-in font + * + * Draw a character from the built-in font to the screen. This function does not support alpha blending, + * only binary transparency. If the background color is fully transparent, the font is drawn with no + * background. Otherwise, the font is drawn on a fully colored background. The foreground and background + * can be set using #graphics_set_color. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the character drawn. + * @param[in] y + * The Y coordinate to place the top left pixel of the character drawn. + * @param[in] ch + * The ASCII character to draw to the screen. + */ +void graphics_draw_character( surface_t* surf, int x, int y, char ch ); + +/** + * @brief Draw a null terminated string to a display context + * + * Draw a string to the screen, following a few simple rules. Standard ASCII is supported, as well + * as \\r, \\n, space and tab. \\r and \\n will both cause the next character to be rendered one line + * lower and at the x coordinate specified in the parameters. The tab character inserts five spaces. + * + * This function does not support alpha blending, only binary transparency. If the background color is + * fully transparent, the font is drawn with no background. Otherwise, the font is drawn on a fully + * colored background. The foreground and background can be set using #graphics_set_color. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the character drawn. + * @param[in] y + * The Y coordinate to place the top left pixel of the character drawn. + * @param[in] msg + * The ASCII null terminated string to draw to the screen. + */ void graphics_draw_text( surface_t* surf, int x, int y, const char * const msg ); + +/** + * @brief Draw a sprite to a display context + * + * Given a sprite structure, this function will draw a sprite to the display context + * with clipping support. + * + * @note This function does not support alpha blending for speed purposes. For + * alpha blending support, please see #graphics_draw_sprite_trans + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped horizontally. + * @param[in] y + * The Y coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped vertically. + * @param[in] sprite + * Pointer to a sprite structure to display to the screen. + */ void graphics_draw_sprite( surface_t* surf, int x, int y, sprite_t *sprite ); + +/** + * @brief Draw a sprite from a spritemap to a display context + * + * Given a sprite structure, this function will draw a sprite out of a larger spritemap + * to the display context with clipping support. This function is useful for software + * tilemapping. If a sprite was generated as a spritemap (it has more than one horizontal + * or vertical slice), this function can display a slice of the sprite as a standalone sprite. + * + * Given a sprite with 3 horizontal slices and 2 vertical slices, the offsets would be as follows: + * + *
+ * *---*---*---*
+ * | 0 | 1 | 2 |
+ * *---*---*---*
+ * | 3 | 4 | 5 |
+ * *---*---*---*
+ * 
+ * + * @note This function does not support alpha blending for speed purposes. For + * alpha blending support, please see #graphics_draw_sprite_trans_stride + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped horizontally. + * @param[in] y + * The Y coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped vertically. + * @param[in] sprite + * Pointer to a sprite structure to display to the screen. + * @param[in] offset + * Offset of the sprite to display out of the spritemap. The offset is counted + * starting from 0. The top left sprite in the map is 0, the next one to the right + * is 1, and so on. + */ void graphics_draw_sprite_stride( surface_t* surf, int x, int y, sprite_t *sprite, int offset ); + +/** + * @brief Draw a sprite to a display context with alpha transparency + * + * Given a sprite structure, this function will draw a sprite to the display context + * with clipping support. + * + * @note This function supports alpha blending and is much slower for 32-bit sprites. + * If you do not need alpha blending support, please see #graphics_draw_sprite. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped horizontally. + * @param[in] y + * The Y coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped vertically. + * @param[in] sprite + * Pointer to a sprite structure to display to the screen. + */ void graphics_draw_sprite_trans( surface_t* surf, int x, int y, sprite_t *sprite ); + +/** + * @brief Draw a sprite from a spritemap to a display context + * + * Given a sprite structure, this function will draw a sprite out of a larger spritemap + * to the display context with clipping support. This function is useful for software + * tilemapping. If a sprite was generated as a spritemap (it has more than one horizontal + * or vertical slice), this function can display a slice of the sprite as a standalone sprite. + * + * Given a sprite with 3 horizontal slices and 2 vertical slices, the offsets would be as follows: + * + *
+ * *---*---*---*
+ * | 0 | 1 | 2 |
+ * *---*---*---*
+ * | 3 | 4 | 5 |
+ * *---*---*---*
+ * 
+ * + * @note This function supports alpha blending and is much slower for 32-bit sprites. + * If you do not need alpha blending support, please see #graphics_draw_sprite_stride. + * + * @param[in] surf + * The currently active display context. + * @param[in] x + * The X coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped horizontally. + * @param[in] y + * The Y coordinate to place the top left pixel of the sprite. This can + * be negative if the sprite is clipped vertically. + * @param[in] sprite + * Pointer to a sprite structure to display to the screen. + * @param[in] offset + * Offset of the sprite to display out of the spritemap. The offset is counted + * starting from 0. The top left sprite in the map is 0, the next one to the right + * is 1, and so on. + */ void graphics_draw_sprite_trans_stride( surface_t* surf, int x, int y, sprite_t *sprite, int offset ); #ifdef __cplusplus diff --git a/include/interrupt.h b/include/interrupt.h index d9f0e8f398..23918abd70 100644 --- a/include/interrupt.h +++ b/include/interrupt.h @@ -8,15 +8,44 @@ #include -#ifdef __cplusplus -extern "C" { -#endif - /** - * @addtogroup interrupt + * @defgroup interrupt Interrupt Controller + * @ingroup lowlevel + * @brief N64 interrupt registering and servicing routines. + * + * The N64 interrupt controller provides a software interface to + * register for interrupts from the various systems in the N64. + * Most interrupts on the N64 coordinate through the MIPS interface + * (MI) to allow interrupts to be handled at one spot. A notable + * exception is the timer interrupt which is generated by the MIPS + * r4300 itself and not the N64 hardware. + * + * The interrupt controller is automatically initialized before + * main is called. By default, all interrupts are enabled and any + * registered callback can be called when an interrupt occurs. + * Each of the N64-generated interrupts is maskable using the various + * set accessors. + * + * Interrupts can be enabled or disabled as a whole on the N64 using + * #enable_interrupts and #disable_interrupts. It is assumed that + * once the interrupt system is activated, these will always be called + * in pairs. Calling #enable_interrupts without first calling + * #disable_interrupts is considered a violation of this assumption + * and should be avoided. Calling #disable_interrupts when interrupts + * are already disabled will have no effect interrupts-wise + * (but should be paired with a #enable_interrupts regardless), + * and in that case the paired #enable_interrupts will not enable + * interrupts either. + * In this manner, it is safe to nest calls to disable and enable + * interrupts. + * * @{ */ +#ifdef __cplusplus +extern "C" { +#endif + /** * @brief State of interrupts on the system */ @@ -32,34 +61,304 @@ typedef enum /** @} */ +/** + * @brief Register an AI callback + * + * @param[in] callback + * Function to call when an AI interrupt occurs + */ void register_AI_handler( void (*callback)() ); + +/** + * @brief Register a VI callback + * + * @param[in] callback + * Function to call when a VI interrupt occurs + */ void register_VI_handler( void (*callback)() ); + +/** + * @brief Register a PI callback + * + * @param[in] callback + * Function to call when a PI interrupt occurs + */ void register_PI_handler( void (*callback)() ); + +/** + * @brief Register a DP callback + * + * @param[in] callback + * Function to call when a DP interrupt occurs + */ void register_DP_handler( void (*callback)() ); + +/** + * @brief Register a SI callback + * + * @param[in] callback + * Function to call when a SI interrupt occurs + */ void register_SI_handler( void (*callback)() ); + +/** + * @brief Register a SP callback + * + * @param[in] callback + * Function to call when a SP interrupt occurs + */ void register_SP_handler( void (*callback)() ); + +/** + * @brief Register a timer callback + * + * The callback will be used when the timer interrupt is triggered by the CPU. + * This happens when the COP0 COUNT register reaches the same value of the + * COP0 COMPARE register. + * + * This function is useful only if you want to do your own low level programming + * of the internal CPU timer and handle the interrupt yourself. In this case, + * also remember to activate the timer interrupt using #set_TI_interrupt. + * + * @note If you use the timer library (#timer_init and #new_timer), you do not + * need to call this function, as timer interrupt are already handled by the timer + * library. + * + * @param[in] callback + * Function to call when a timer interrupt occurs + */ void register_TI_handler( void (*callback)() ); + +/** + * @brief Register a CART interrupt callback. + * + * The callback will be called when a CART interrupt is triggered. CART interrupts + * are interrupts triggered by devices attached to the PI bus (aka CART bus), + * for instance the 64DD, or the modem cassette. + * + * CART interrupts are disabled by default in libdragon. Use #set_CART_interrupt + * to enable/disable them. + * + * Notice that there is no generic way to acknowledge those interrupts, so if + * you activate CART interrupts, make also sure to register an handler that + * acknowledge them, otherwise the interrupt will deadlock the console. + * + * @param[in] callback + * Function that should no longer be called on CART interrupts + */ void register_CART_handler( void (*callback)() ); + +/** + * @brief Register a handler that will be called when the user + * presses the RESET button. + * + * The N64 sends an interrupt when the RESET button is pressed, + * and then actually resets the console after about ~500ms (but less + * on some models, see #RESET_TIME_LENGTH). + * + * Registering a handler can be used to perform a clean reset. + * Technically, at the hardware level, it is important that the RCP + * is completely idle when the reset happens, or it might freeze + * and require a power-cycle to unfreeze. This means that any + * I/O, audio, video activity must cease before #RESET_TIME_LENGTH + * has elapsed. + * + * This entry point can be used by the game code to basically + * halts itself and stops issuing commands. Libdragon itself will + * register handlers to halt internal modules so to provide a basic + * good reset experience. + * + * Handlers can use #exception_reset_time to read how much has passed + * since the RESET button was pressed. + * + * @param callback Callback to invoke when the reset button is pressed. + * + * @note Reset handlers are called under interrupt. + * + */ void register_RESET_handler( void (*callback)() ); + +/** + * @brief Unregister an AI callback + * + * @param[in] callback + * Function that should no longer be called on AI interrupts + */ void unregister_AI_handler( void (*callback)() ); + +/** + * @brief Unregister a VI callback + * + * @param[in] callback + * Function that should no longer be called on VI interrupts + */ void unregister_VI_handler( void (*callback)() ); + +/** + * @brief Unegister a PI callback + * + * @param[in] callback + * Function that should no longer be called on PI interrupts + */ void unregister_PI_handler( void (*callback)() ); + +/** + * @brief Unregister a DP callback + * + * @param[in] callback + * Function that should no longer be called on DP interrupts + */ void unregister_DP_handler( void (*callback)() ); + +/** + * @brief Unegister a SI callback + * + * @param[in] callback + * Function that should no longer be called on SI interrupts + */ void unregister_SI_handler( void (*callback)() ); + +/** + * @brief Unegister a SP callback + * + * @param[in] callback + * Function that should no longer be called on SP interrupts + */ void unregister_SP_handler( void (*callback)() ); + +/** + * @brief Unregister a timer callback + * + * @note If you use the timer library (#timer_init and #new_timer), you do not + * need to call this function, as timer interrupt are already handled by the timer + * library. + * + * @param[in] callback + * Function that should no longer be called on timer interrupts + */ void unregister_TI_handler( void (*callback)() ); + +/** + * @brief Unregister a CART interrupt callback + * + * @param[in] callback + * Function that should no longer be called on CART interrupts + */ void unregister_CART_handler( void (*callback)() ); + +/** + * @brief Unregister a RESET interrupt callback + * + * @param[in] callback + * Function that should no longer be called on RESET interrupts + */ void unregister_RESET_handler( void (*callback)() ); + +/** + * @brief Enable or disable the AI interrupt + * + * @param[in] active + * Flag to specify whether the AI interrupt should be active + */ void set_AI_interrupt( int active ); + +/** + * @brief Enable or disable the VI interrupt + * + * The VI interrupt is generated when the VI begins displaying a specific line + * of the display output. The line number configured always refers to the + * final TV output, so it should be either in the range 0..524 (NTSC) or + * 0..624 (PAL). + * The vblank happens at the beginning of the display period, in range + * 0..33 (NTSC) or 0..43 (PAL). A common value used to trigger the interrupt + * at the beginning of the vblank is 2. + * + * In non-interlaced modes, the VI only draws on even lines, so configuring + * the interrupt on an odd line causes the interrupt to never trigger. + * In interlace modes, instead, the VI alternates between even lines and odd + * lines, so any specific line will trigger an interrupt only every other + * frame. If you need an interrupt every frame in interlaced mode, you will + * need to reconfigure the interrupt every frame, alternating between an odd + * and an even number. + * + * @param[in] active + * Flag to specify whether the VI interrupt should be active + * @param[in] line + * The vertical line that causes this interrupt to fire. Ignored + * when setting the interrupt inactive. + * This line number refers to the lines in the TV output, + * and is unrelated to the current resolution. + */ void set_VI_interrupt( int active, unsigned long line ); + +/** + * @brief Enable or disable the PI interrupt + * + * @param[in] active + * Flag to specify whether the PI interrupt should be active + */ void set_PI_interrupt( int active ); + +/** + * @brief Enable or disable the DP interrupt + * + * @param[in] active + * Flag to specify whether the DP interrupt should be active + */ void set_DP_interrupt( int active ); + +/** + * @brief Enable or disable the SI interrupt + * + * @param[in] active + * Flag to specify whether the SI interrupt should be active + */ void set_SI_interrupt( int active ); + +/** + * @brief Enable or disable the SP interrupt + * + * @param[in] active + * Flag to specify whether the SP interrupt should be active + */ void set_SP_interrupt( int active ); + +/** + * @brief Enable or disable the timer interrupt + * + * @note If you use the timer library (#timer_init and #new_timer), you do not + * need to call this function, as timer interrupt is already handled by the timer + * library. + * + * @param[in] active + * Flag to specify whether the timer interrupt should be active + * + * @see #register_TI_handler + */ void set_TI_interrupt( int active ); + +/** + * @brief Enable or disable the CART interrupt + * + * @param[in] active + * Flag to specify whether the CART interrupt should be active + * + * @see #register_CART_handler + */ void set_CART_interrupt( int active ); + +/** + * @brief Enable the RESET interrupt + * + * @param[in] active + * Flag to specify whether the RESET interrupt should be active + * + * @note RESET interrupt is active by default. + * + * @see #register_RESET_handler + */ void set_RESET_interrupt( int active ); /** @@ -76,6 +375,35 @@ void set_RESET_interrupt( int active ); */ #define RESET_TIME_LENGTH TICKS_FROM_MS(200) + +/** + * @brief Check whether the RESET button was pressed and how long we are into + * the reset process. + * + * This function returns how many ticks have elapsed since the user has pressed + * the RESET button, or 0 if the user has not pressed it. + * + * It can be used by user code to perform actions during the RESET + * process (see #register_RESET_handler). It is also possible to simply + * poll this value to check at any time if the button has been pressed or not. + * + * The reset process takes about 500ms between the user pressing the + * RESET button and the CPU being actually reset, though on some consoles + * it seems to be much less. See #RESET_TIME_LENGTH for more information. + * For the broadest compatibility, please use #RESET_TIME_LENGTH to implement + * the reset logic. + * + * Notice also that the reset process is initiated when the user presses the + * button, but the reset will not happen until the user releases the button. + * So keeping the button pressed is a good way to check if the application + * actually winds down correctly. + * + * @return Ticks elapsed since RESET button was pressed, or 0 if the RESET button + * was not pressed. + * + * @see register_RESET_handler + * @see #RESET_TIME_LENGTH + */ uint32_t exception_reset_time( void ); static inline __attribute__((deprecated("calling init_interrupts no longer required"))) @@ -87,9 +415,32 @@ void register_reset_handler( void (*callback)() ) register_RESET_handler(callback); } + +/** + * @brief Enable interrupts systemwide + * + * @note If this is called inside a nested disable call, it will have no effect on the + * system. Therefore it is safe to nest disable/enable calls. After the least + * nested enable call, systemwide interrupts will be reenabled. + */ void enable_interrupts(); + +/** + * @brief Disable interrupts systemwide + * + * @note If interrupts are already disabled on the system or interrupts have not + * been initialized, this function will not modify the system state. + */ void disable_interrupts(); + +/** + * @brief Return the current state of interrupts + * + * @retval INTERRUPTS_UNINITIALIZED if the interrupt system has not been initialized yet. + * @retval INTERRUPTS_DISABLED if interrupts have been disabled. + * @retval INTERRUPTS_ENABLED if interrupts are currently enabled. + */ interrupt_state_t get_interrupts_state(); #ifdef __cplusplus diff --git a/include/joybus.h b/include/joybus.h index f70be93ac1..bd6613782b 100644 --- a/include/joybus.h +++ b/include/joybus.h @@ -12,7 +12,43 @@ #include /** - * @addtogroup joybus + * @defgroup joybus Joybus Subsystem + * @ingroup peripherals + * @brief Joybus peripheral interface. + * + * The Joybus subsystem is in charge of communication with all controllers, + * accessories, and peripherals plugged into the N64 controller ports as well + * as some peripherals on the cartridge. The Joybus subsystem is responsible + * for communicating with the serial interface (SI) registers to send commands + * to controllers (including Controller Paks, Rumble Paks, and Transfer Paks), + * the VRU, EEPROM save memory, and the cartridge-based real-time clock. + * + * This module implements just the low-level protocol. You should use it + * only to implement an unsupported peripherals. Otherwise, refer to the + * higher-level modules such as: + * + * For controllers: + * @ref controller "Controller Subsystem". + * + * For EEPROM, RTC and other peripherals: + * @ref peripherals "Peripherals Subsystem". + * + * Internally, the JoyBus subsystem communicates with the PIF controller via + * the SI DMA, via the JoyBus protocol which is a standard master/slave + * binary protocol. Each message of the protocol is a block of 64 bytes, and + * can contain multiple commands. Currently, there are no macros or functions + * to help composing a JoyBus message, so higher-level libraries currently + * hard code the binary messages. + * + * All communications is made asynchronously because SI DMA is quite slow: + * its completion is bound to the PIF actually processing the data, rather than + * just being the memory transfer. A queue of pending JoyBus messages is kept + * in a ring buffer, and is then executed under interrupt when the previous SI DMA + * completes. The internal entry point is #joybus_exec_async, that schedules + * a message to be sent to PIF, and calls a callback with the reply whenever + * it is available. A blocking API (#joybus_exec) is made available for + * simpler usage. + * * @{ */ @@ -212,7 +248,23 @@ typedef uint16_t joybus_identifier_t; #define JOYBUS_IDENTIFY_STATUS_EEPROM_BUSY 0x80 /** @} */ -void joybus_exec( const void * inblock, void * outblock ); + +/** + * @brief Write a 64-byte block of data to the PIF and read the 64-byte result. + * + * This function is not a stable feature of the libdragon API and should be + * considered experimental! + * + * The usage of this function will likely change as a result of the ongoing + * effort to integrate the multitasking kernel with asynchronous operations. + * + * @param[in] input + * Source buffer for the input block to send to the PIF + * + * @param[out] output + * Destination buffer to place the output block from the PIF + */ +void joybus_exec( const void* input, void* output ); /** * @brief Execute a Joybus command synchronously on the given port. diff --git a/include/mempak.h b/include/mempak.h index 0d963f6ce6..4a6384a76b 100644 --- a/include/mempak.h +++ b/include/mempak.h @@ -7,7 +7,25 @@ #define __LIBDRAGON_MEMPAK_H /** - * @addtogroup controllerpak + * @defgroup cpak Controller Pak Filesystem Routines + * @ingroup controller + * @brief Managed Controller Pak interface. + * + * The Controller Pak system is a subsystem of the @ref controller. Before attempting to + * read from or write to a Controller Pak, be sure you have initialized the Joypad subsystem + * with #joypad_init and verified that you have a Controller Pak in the correct controller + * using #joypad_get_accessory_type. + * + * To read and write to the Controller Pak in an organized way compatible with official software, + * first check that the Controller Pak is valid using #validate_mempak. If the Controller Pak is + * invalid, it will need to be formatted using #format_mempak. Once the Controller Pak is + * considered valid, existing notes can be enumerated using #get_mempak_entry. To + * read the data associated with a note, use #read_mempak_entry_data. To write a + * new note to the Controller Pak, use #write_mempak_entry_data. Note that there is no append + * functionality so if a note is being updated, ensure you have deleted the old note + * first using #delete_mempak_entry. Code should be careful to check how many blocks + * are free before writing using #get_mempak_free_space. + * * @{ */ @@ -63,14 +81,168 @@ typedef struct entry_structure extern "C" { #endif +/** + * @brief Read a sector from a Controller Pak + * + * This will read a sector from a Controller Pak. Sectors on Controller Paks are always 256 bytes + * in size. + * + * @param[in] controller + * The controller (0-3) to read a sector from + * @param[in] sector + * The sector (0-127) to read from + * @param[out] sector_data + * Buffer to place 256 read bytes of data + * + * @retval 0 if reading was successful + * @retval -1 if the sector was out of bounds or sector_data was null + * @retval -2 if there was an error reading part of a sector + */ int read_mempak_sector( int controller, int sector, uint8_t *sector_data ); + +/** + * @brief Write a sector to a Controller Pak + * + * This will write a sector to a Controller Pak. Sectors on Controller Paks are always 256 bytes + * in size. + * + * @param[in] controller + * The controller (0-3) to write a sector to + * @param[in] sector + * The sector (0-127) to write to + * @param[out] sector_data + * Buffer containing 256 bytes of data to write to sector + * + * @retval 0 if writing was successful + * @retval -1 if the sector was out of bounds or sector_data was null + * @retval -2 if there was an error writing part of a sector + */ int write_mempak_sector( int controller, int sector, uint8_t *sector_data ); + +/** + * @brief Return whether a Controller Pak is valid + * + * This function will return whether the Controller Pak in a particular controller + * is formatted and valid. + * + * @param[in] controller + * The controller (0-3) to validate + * + * @retval 0 if the Controller Pak is valid and ready to be used + * @retval -2 if the Controller Pak is not present or couldn't be read + * @retval -3 if the Controller Pak is bad or unformatted + */ int validate_mempak( int controller ); + +/** + * @brief Return the number of free blocks on a Controller Pak + * + * Note that a block is identical in size to a sector. To calculate the number of + * bytes free, multiply the return of this function by #MEMPAK_BLOCK_SIZE. + * + * @param[in] controller + * The controller (0-3) to read the free space from + * + * @return The number of blocks free on the memory card or a negative number on failure + */ int get_mempak_free_space( int controller ); + +/** + * @brief Read an entry on a Controller Pak + * + * Given an entry index (0-15), return the entry as found on the Controller Pak. If + * the entry is blank or invalid, the valid flag is cleared. + * + * @param[in] controller + * The controller (0-3) from which the entry should be read + * @param[in] entry + * The entry index (0-15) to read + * @param[out] entry_data + * Structure containing information on the entry + * + * @retval 0 if the entry was read successfully + * @retval -1 if the entry is out of bounds or entry_data is null + * @retval -2 if the Controller Pak is bad or not present + */ int get_mempak_entry( int controller, int entry, entry_structure_t *entry_data ); + +/** + * @brief Format a Controller Pak + * + * Formats a Controller Pak. This should only be done to totally wipe and re-initialize + * the filesystem in case of a blank or corrupt Controller Pak after a repair has failed. + * + * @param[in] controller + * The Controller (0-3) that the Controller Pak is inserted. + * + * @retval 0 if the Controller Pak was formatted successfully. + * @retval -2 if the Controller Pak was not present or couldn't be formatted. + */ int format_mempak( int controller ); + +/** + * @brief Read the data associated with an entry on a Controller Pak + * + * Given a valid Controller Pak entry fetched by get_mempak_entry, retrieves the contents + * of the entry. The calling function must ensure that enough room is available in + * the passed in buffer for the entire entry. The entry structure itself contains + * the number of blocks used to store the data which can be multiplied by + * #MEMPAK_BLOCK_SIZE to calculate the size of the buffer needed. + * + * @param[in] controller + * The controller (0-3) to read the entry data from + * @param[in] entry + * The entry structure associated with the data to be read. An entry + * structure can be fetched based on index using #get_mempak_entry + * @param[out] data + * The data associated with an entry + * + * @retval 0 if the entry was successfully read + * @retval -1 if input parameters were out of bounds or the entry was corrupted somehow + * @retval -2 if the Controller Pak was not present or bad + * @retval -3 if the data couldn't be read + */ int read_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t *data ); + +/** + * @brief Write associated data to a Controller Pak entry + * + * Given a Controller Pak entry structure with a valid region, name and block count, writes the + * entry and associated data to the Controller Pak. This function will not overwrite any existing + * user data. To update an existing entry, use #delete_mempak_entry followed by + * #write_mempak_entry_data with the same entry structure. + * + * @param[in] controller + * The controller (0-3) to write the entry and data to + * @param[in] entry + * The entry structure containing a region, name and block count + * @param[in] data + * The associated data to write to to the created entry + * + * @retval 0 if the entry was created and written successfully + * @retval -1 if the parameters were invalid or the note has no length + * @retval -2 if the Controller Pak wasn't present or was bad + * @retval -3 if there was an error writing to the Controller Pak + * @retval -4 if there wasn't enough space to store the note + * @retval -5 if there is no room in the TOC to add a new entry + */ int write_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t *data ); + +/** + * @brief Delete a Controller Pak entry and associated data + * + * Given a valid Controller Pak entry fetched by #get_mempak_entry, removes the entry and frees + * all associated blocks. + * + * @param[in] controller + * The controller (0-3) to delete the note from + * @param[in] entry + * The entry structure that is to be deleted from the Controller Pak + * + * @retval 0 if the entry was deleted successfully + * @retval -1 if the entry was invalid + * @retval -2 if the Controller Pak was bad or not present + */ int delete_mempak_entry( int controller, entry_structure_t *entry ); #ifdef __cplusplus diff --git a/include/n64sys.h b/include/n64sys.h index 2e1ca229ee..910636c989 100644 --- a/include/n64sys.h +++ b/include/n64sys.h @@ -14,7 +14,20 @@ #include "cop1.h" /** - * @addtogroup n64sys + * @defgroup n64sys N64 System Interface + * @ingroup lowlevel + * @brief N64 bootup and cache interfaces. + * + * The N64 system interface provides a way for code to interact with + * the memory setup on the system. This includes cache operations to + * invalidate or flush regions and the ability to set the boot CIC. + * The @ref system use the knowledge of the boot CIC to properly determine + * if the expansion pak is present, giving 4 MiB of additional memory. Aside + * from this, the MIPS r4300 uses a manual cache management strategy, where + * SW that requires passing buffers to and from hardware components using + * DMA controllers needs to ensure that cache and RDRAM are in sync. A + * set of operations to invalidate and/or write back cache is provided for + * both instruction cache and data cache. * @{ */ @@ -311,19 +324,157 @@ void die(void); }) void __data_cache_hit_invalidate(volatile void * addr, unsigned long length); + +/** + * @brief Force a data cache writeback over a memory region + * + * Use this to force cached memory to be written to RDRAM. + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void data_cache_hit_writeback(volatile const void *, unsigned long); + +/** + * @brief Force a data cache writeback invalidate over a memory region + * + * Use this to force cached memory to be written to RDRAM + * and then invalidate the corresponding cache lines. + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void data_cache_hit_writeback_invalidate(volatile void *, unsigned long); + +/** + * @brief Force a data cache index writeback invalidate over a memory region + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void data_cache_index_writeback_invalidate(volatile void *, unsigned long); + +/** + * @brief Force a data cache writeback invalidate over whole memory + * + * Also see #data_cache_hit_writeback_invalidate + * + */ void data_cache_writeback_invalidate_all(void); + +/** + * @brief Force an instruction cache writeback over a memory region + * + * Use this to force cached memory to be written to RDRAM. + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void inst_cache_hit_writeback(volatile const void *, unsigned long); + +/** + * @brief Force an instruction cache invalidate over a memory region + * + * Use this to force the N64 to update cache from RDRAM. + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void inst_cache_hit_invalidate(volatile void *, unsigned long); + +/** + * @brief Force an instruction cache index invalidate over a memory region + * + * @param[in] addr + * Pointer to memory in question + * @param[in] length + * Length in bytes of the data pointed at by addr + */ void inst_cache_index_invalidate(volatile void *, unsigned long); + +/** + * @brief Force an instruction cache invalidate over whole memory + * + * Also see #inst_cache_hit_invalidate + * + */ void inst_cache_invalidate_all(void); + +/** + * @brief Get amount of available memory. + * + * @return amount of total available memory in bytes. + */ int get_memory_size(); + +/** + * @brief Is expansion pak in use. + * + * Checks whether the maximum available memory has been expanded to 8 MiB + * + * @return true if expansion pak detected, false otherwise. + * + * @note On iQue, this function returns true only if the game has been assigned + * exactly 8 MiB of RAM. + */ bool is_memory_expanded(); + +/** + * @brief Allocate a buffer that will be accessed as uncached memory. + * + * This function allocates a memory buffer that can be safely read and written + * through uncached memory accesses only. It makes sure that that the buffer + * does not share any cacheline with other buffers in the heap, and returns + * a pointer in the uncached segment (0xA0000000). + * + * The buffer contents are uninitialized. + * + * To free the buffer, use #free_uncached. + * + * @param[in] size The size of the buffer to allocate + * + * @return a pointer to the start of the buffer (in the uncached segment) + * + * @see #free_uncached + */ void *malloc_uncached(size_t size); + +/** + * @brief Allocate a buffer that will be accessed as uncached memory, specifying alignment + * + * This function is similar to #malloc_uncached, but allows to force a higher + * alignment to the buffer (just like memalign does). See #malloc_uncached + * for reference. + * + * @param[in] align The alignment of the buffer in bytes (eg: 64) + * @param[in] size The size of the buffer to allocate + * + * @return a pointer to the start of the buffer (in the uncached segment) + * + * @see #malloc_uncached + */ void *malloc_uncached_aligned(int align, size_t size); + +/** + * @brief Free an uncached memory buffer + * + * This function frees a memory buffer previously allocated via #malloc_uncached. + * + * @param[in] buf The buffer to free + * + * @see #malloc_uncached + */ void free_uncached(void *buf); /** @brief Type of TV video output */ @@ -333,6 +484,13 @@ typedef enum { TV_MPAL = 2 ///< Video output is M-PAL } tv_type_t; +/** + * @brief Is system NTSC/PAL/MPAL + * + * Checks enum hard-coded in PIF BootROM to indicate the tv type of the system. + * + * @return enum value indicating PAL, NTSC or MPAL + */ tv_type_t get_tv_type(); /** @brief Reset types */ diff --git a/include/rtc.h b/include/rtc.h index 86dae75fd5..cd1d8c9c88 100644 --- a/include/rtc.h +++ b/include/rtc.h @@ -10,7 +10,88 @@ #include /** - * @addtogroup rtc + * @defgroup rtc Real-Time Clock Subsystem + * @ingroup peripherals + * @brief Real-time clock interface. + * @author Christopher Bonhage + * + * The Joybus real-time clock is a cartridge peripheral that uses a battery + * to power a clock that tracks the date, time, and day of the week. The + * real-time clock keeps running even when the N64 is powered-off. The + * Joybus RTC is accessed through the serial interface (SI) similar to EEPROM + * and controllers. The Joybus RTC was only ever available on one official + * cartridge that was only available in Japan: Dōbutsu no Mori (Animal Forest). + * Many emulators and flash carts include support for the Animal Forest RTC, + * which makes it possible to include real-time clock functionality in homebrew! + * There is also a real-time clock included in the N64DD hardware, which uses + * a different interface and is not currently supported by libdragon. + * + * To check if the real-time clock is available, call #rtc_init. + * To read the current time from the real-time clock, call #rtc_get. + * Once the RTC subsystem is initialized, you can also use ISO C Time functions + * to get the current time, for example: `time(NULL)` will return the number of + * seconds elapsed since the UNIX epoch (January 1, 1970 at 00:00:00). + * To check if the real-time clock supports writes, call #rtc_is_writable. + * To write a new time to the real-time clock, call #rtc_set. + * + * This subsystem handles decoding and encoding the date/time from its internal + * format into a struct called #rtc_time_t, which contains integer values for + * year, month, day-of-month, day-of-week, hour, minute, and second. + * + * The Joybus RTC contains 3 "blocks" (or zones) which contain 8 bytes of data: + * Block 0 contains a half-word control register and opaque calibration data. + * Block 1 is unused and unsupported. See notes below. + * Block 2 contains the current date/time as packed binary-coded decimal. + * + * Animal Forest did not use block 1 at all, so most emulators do not bother to + * implement it. Theoretically, block 1 could be used as 8-bytes of SRAM-backed + * storage, but this is not supported by libdragon's Real-Time Clock Subsystem. + * If you need storage, consider using a standard cartridge save type or saving + * to a Controller Pak. + * + * (As of July 2021) Joybus RTC does not work in combination with any EEPROM + * save type on EverDrive64 3.0 or X7. To have the best compatibility and player + * experience, it is not recommended to use the EEPROM + RTC ROM configuration. + * This is a bug in the EverDrive64 firmware and not a system limitation imposed + * by the Joybus protocol or Serial Interface. + * + * Unfortunately, since only one game ever used Joybus RTC (and that game was + * later re-released on the GameCube in English), real-time clock support in + * emulators and flash carts can be incomplete, inaccurate, or non-existent. + * Many emulators do not actually implement the Joybus RTC write command and + * always respond with the host device's current local time. Some emulators + * and flash carts support writing to RTC but will not persist the date/time + * after resetting or powering-off. You can run the `rtctest` example ROM on + * your preferred emulator or flash cart to what RTC support is available. + * + * The only reliable way to check if writes are actually supported is to write + * a time to the RTC and read the time back out. Many emulators that do + * support RTC reads will silently ignore RTC writes. You should detect + * whether writes are supported using #rtc_is_writable so that you can + * conditionally show the option to change the time if it's supported. If the + * RTC supports writes, it is safe to call #rtc_set to set the date and time. + * + * Due to the inaccurate and inconsistent behavior of RTC reproductions that + * currently exist, this subsystem trades-off complete accuracy with the actual + * Animal Forest RTC in favor of broader compatibility with the various quirks + * and bugs that exist in real-world scenarios like emulators and flash carts. + * + * Some notable examples of RTC support in the ecosystem (as of July 2021): + * + * 64drive hw2 fully implements Joybus RTC including writes, but requires + * delays after setting the time (see #JOYBUS_RTC_WRITE_FINISHED_DELAY). + * + * EverDrive64 3.0 and X7 partially support Joybus RTC, with caveats: The RTC + * must be explicitly enabled in the OS or with a ROM header configuration; + * RTC will not be detected if the EEPROM save type is used; RTC writes are + * not supported through the SI, so changing the time must be done in the OS. + * + * UltraPIF fully implements an emulated Joybus RTC that can be accessed even + * when the cartridge does not include the real-time clock circuitry. + * + * Special thanks to marshallh and jago85 for their hard work and research + * reverse-engineering and documenting the inner-workings of the Joybus RTC. + * * @{ */ @@ -39,11 +120,129 @@ typedef struct rtc_time_t extern "C" { #endif +/** + * @brief High-level convenience helper to initialize the RTC subsystem. + * + * The RTC Subsystem depends on the libdragon Timer Subsystem, so make sure + * to call #timer_init before calling #rtc_init! + * + * Some flash carts require the RTC to be explicitly enabled before loading + * the ROM file. Some emulators and flash carts do not support RTC at all. + * + * This function will detect if the RTC is available and if so, will + * prepare the RTC so that the current time can be read from it. + * + * This operation may take up to 50 milliseconds to complete. + * + * This will also hook the RTC into the newlib gettimeofday function, so + * you will be able to use the ISO C time functions if RTC is available. + * + * @return whether the RTC is present and supported by the RTC Subsystem. + */ bool rtc_init( void ); + +/** + * @brief Close the RTC Subsystem, disabling system hooks. + * + * Unhooks the RTC from the newlib gettimeofday function. + * This will cause subsequent calls to gettimeofday to error with ENOSYS. + * + * The only reason you should ever need to call this is if you need to + * stop the Timer Subsystem, which the RTC Subsystem depends on. If you + * do need to do this, make sure to call #rtc_close BEFORE #timer_close + * and then call #rtc_init again after you call #timer_init to restart + * the Timer Subsystem! + */ void rtc_close( void ); + +/** + * @brief Determine whether the RTC supports writing the time. + * + * Some emulators and flash carts do not support writing to the RTC, so + * this function makes an attempt to detect silent write failures and will + * return `false` if it is unable to change the time on the RTC. + * + * This function is useful if your program wants to conditionally offer the + * ability to set the time based on hardware/emulator support. + * + * Unfortunately this operation may introduce a slight drift in the clock, + * but it is the only way to determine if the RTC supports the write command. + * + * This operation will take approximately 1 second to complete. + * + * @return whether RTC writes appear to be supported + */ bool rtc_is_writable( void ); + +/** + * @brief Read the current date/time from the real-time clock. + * + * If the RTC is not detected or supported, this function will + * not modify the destination rtc_time parameter. + * + * Your code should call this once per frame to update the #rtc_time_t + * data structure. The RTC Subsystem maintains a cache of the + * most-recent RTC time that was read and will only perform an + * actual RTC read command if the cache is invalidated. The + * destination rtc_time parameter will be updated regardless of + * the cache validity. + * + * Cache will invalidate every #RTC_GET_CACHE_INVALIDATE_TICKS. + * Calling #rtc_set will also invalidate the cache. + * + * If an actual RTC read command is needed, this function can take + * a few milliseconds to complete. + * + * @param[out] rtc_time + * Destination pointer for the RTC time data structure + * + * @return whether the rtc_time destination pointer data was modified + */ bool rtc_get( rtc_time_t * rtc_time ); + +/** + * @brief High-level convenience helper to set the RTC date/time. + * + * Prepares the RTC for writing, sets the new time, and resumes the clock. + * + * This function will take approximately 570 milliseconds to complete. + * + * Unfortunately, the best way to ensure that writes to the RTC have + * actually finished is by waiting for a fixed duration. Emulators may not + * accurately reflect this, but this delay is necessary on real hardware. + * + * @param[in] rtc_time + * Source pointer for the RTC time data structure + * + * @return false if the RTC does not support being set + */ bool rtc_set( rtc_time_t * rtc_time ); + +/** + * @brief Calculate sane values for arbitrary time inputs. + * + * If your time inputs are already sane, nothing should change. + * This function will clamp date/time values within the expected ranges, + * including the correct day-of-month based on year/month. It will also + * recalculate the day-of-week based on the clamped year/month/day. + * + * This is useful to call while the player is adjusting the time after each + * input to ensure that the date being set always makes sense before they + * actually confirm and commit the updated date/time. The rtctest example + * demonstrates a user-interface for setting the time with live validation. + * + * Internally, RTC cannot represent dates before 1990-01-01, although some + * RTC implementations (like UltraPIF) only support dates after 2000-01-01. + * + * For highest compatibility, it is not recommended to set the date past + * 2038-01-19 03:14:07 UTC, which is the UNIX timestamp Epochalypse. + * + * Special thanks to networkfusion for providing the algorithm to + * calculate day-of-week from an arbitrary date. + * + * @param[in,out] rtc_time + * Pointer to the RTC time data structure + */ void rtc_normalize_time( rtc_time_t * rtc_time ); #ifdef __cplusplus diff --git a/include/system.h b/include/system.h index 3a3d006328..54bb516339 100644 --- a/include/system.h +++ b/include/system.h @@ -6,8 +6,35 @@ #ifndef __LIBDRAGON_SYSTEM_H #define __LIBDRAGON_SYSTEM_H -/** - * @addtogroup system +/** + * @defgroup system newlib Interface Hooks + * @brief System hooks to provide low level threading and filesystem functionality to newlib. + * + * newlib provides all of the standard C libraries for homebrew development. + * In addition to standard C libraries, newlib provides some additional bridging + * functionality to allow POSIX function calls to be tied into libdragon. + * Currently this is used only for filesystems. The newlib interface hooks here + * are mostly stubs that allow homebrew applications to compile. + * + * The sbrk function is responsible for allowing newlib to find the next chunk + * of free space for use with malloc calls. The size of the available heap is + * computed using the memory size computed by the boot code (IPL3), and available + * via #get_memory_size(), which is normally either 4 MiB or 8 MiB if the expansion + * pak is available. + * + * libdragon has defined a custom callback structure for filesystems to use. + * Providing relevant hooks for calls that your filesystem supports and passing + * the resulting structure to #attach_filesystem will hook your filesystem into + * newlib. Calls to POSIX file operations will be passed on to your filesystem + * code if the file prefix matches, allowing code to make use of your filesystyem + * without being rewritten. + * + * For example, your filesystem provides libdragon an interface to access a + * homebrew SD card interface. You register a filesystem with "sd:/" as the prefix + * and then attempt to open "sd://directory/file.txt". The open callback for your + * filesystem will be passed the file "/directory/file.txt". The file handle returned + * will be passed into all subsequent calls to your filesystem until the file is + * closed. * @{ */ @@ -222,13 +249,87 @@ typedef struct int (*stderr_write)( char *data, unsigned int len ); } stdio_t; +/** + * @brief Register a filesystem with newlib + * + * This function will take a prefix in the form of 'prefix:/' and a pointer + * to a filesystem structure of relevant callbacks and register it with newlib. + * Any standard open/fopen calls with the registered prefix will be passed + * to this filesystem. Userspace code does not need to know the underlying + * filesystem, only the prefix that it has been registered under. + * + * The filesystem pointer passed in to this function should not go out of scope + * for the lifetime of the filesystem. + * + * @param[in] prefix + * Prefix of the filesystem + * @param[in] filesystem + * Structure of callbacks for various functions in the filesystem. + * If the registered filesystem doesn't support an operation, it + * should leave the callback null. + * + * @retval -1 if the parameters are invalid + * @retval -2 if the prefix is already in use + * @retval -3 if there are no more slots for filesystems + * @retval 0 if the filesystem was registered successfully + */ int attach_filesystem( const char * const prefix, filesystem_t *filesystem ); + +/** + * @brief Unregister a filesystem from newlib + * + * @note This function will make sure all files are closed before unregistering + * the filesystem. + * + * @param[in] prefix + * The prefix that was used to register the filesystem + * + * @retval -1 if the parameters were invalid + * @retval -2 if the filesystem couldn't be found + * @retval 0 if the filesystem was successfully unregistered + */ int detach_filesystem( const char * const prefix ); + +/** + * @brief Hook into stdio for STDIN, STDOUT and STDERR callbacks + * + * @param[in] stdio_calls + * Pointer to structure containing callbacks for stdio functions + * + * @return 0 on successful hook or a negative value on failure. + */ int hook_stdio_calls( stdio_t *stdio_calls ); + +/** + * @brief Unhook from stdio + * + * @param[in] stdio_calls + * Pointer to structure containing callbacks for stdio functions + * + * @return 0 on successful hook or a negative value on failure. + */ int unhook_stdio_calls( stdio_t *stdio_calls ); + +/** + * @brief Hook into gettimeofday with a current time callback. + * + * @param[in] time_fn + * Pointer to callback for the current time function + * + * @return 0 if successful or a negative value on failure. + */ int hook_time_call( time_t (*time_fn)( void ) ); + +/** + * @brief Unhook from gettimeofday current time callback. + * + * @param[in] time_fn + * Pointer to callback for the current time function + * + * @return 0 if successful or a negative value on failure. + */ int unhook_time_call( time_t (*time_fn)( void ) ); #ifdef __cplusplus diff --git a/include/timer.h b/include/timer.h index 0b0e25caef..c39d584881 100644 --- a/include/timer.h +++ b/include/timer.h @@ -9,8 +9,32 @@ #include #include "n64sys.h" -/** - * @addtogroup timer +/** + * @defgroup timer Timer Subsystem + * @ingroup libdragon + * @brief Interface to the timer module in the MIPS r4300 processor. + * + * The timer subsystem allows code to receive a callback after a specified + * number of ticks or microseconds. It interfaces with the MIPS + * coprocessor 0 to handle the timer interrupt and provide useful timing + * services. + * + * Before attempting to use the timer subsystem, code should call #timer_init. + * After the timer subsystem has been initialized, a new one-shot or + * continuous timer can be created with #new_timer. To remove an expired + * one-shot timer or a recurring timer, use #delete_timer. To temporarily + * stop a timer, use #stop_timer. To restart a stopped timer or an expired + * one-shot timer, use #start_timer. Once code no longer needs the timer + * subsystem, a call to #timer_close will free all continuous timers and shut + * down the timer subsystem. Note that timers removed with #stop_timer or + * expired one-short timers will not be removed automatically and are the + * responsibility of the calling code to be freed, regardless of a call to + * #timer_close. + * + * Because the MIPS internal counter wraps around after ~90 seconds (see + * TICKS_READ), it's not possible to schedule a timer more than 90 seconds + * in the future. + * * @{ */ @@ -94,28 +118,147 @@ typedef struct timer_link extern "C" { #endif -/* initialize timer subsystem */ +/** + * @brief Initialize the timer subsystem + * + * This function will reset the COP0 ticks counter to 0. Even if you + * later access the hardware counter directly (via TICKS_READ()), it should not + * be a problem if you call timer_init() early in the application main. + * + * Do not modify the COP0 ticks counter after calling this function. Doing so + * will impede functionality of the timer module. + * + * The timer subsystem tracks the number of times #timer_init is called + * and will only initialize the subsystem on the first call. This reference + * count also applies to #timer_close, which will only close the subsystem + * if it is called the same number of times as #timer_init. + */ void timer_init(void); -/* delete all timers in list */ + +/** + * @brief Free and close the timer subsystem + * + * This function will ensure all recurring timers are deleted from the list + * before closing. One-shot timers that have expired will need to be + * manually deleted with #delete_timer. + * + * The timer subsystem tracks the number of times #timer_init is called + * and will only close the subsystem if #timer_close is called the same + * number of times. + */ void timer_close(void); -/* return total ticks since timer was initialized */ + +/** + * @brief Return total ticks since timer was initialized, as a 64-bit counter. + * + * @return Then number of ticks since the timer was initialized + * + */ long long timer_ticks(void); -/* create a new timer and add to list */ + +/** + * @brief Create a new timer and add to list + * + * If you need to associate some data with the timer, consider using + * #new_timer_context to include a pointer in the callback. + * + * @param[in] ticks + * Number of ticks before the timer should fire + * @param[in] flags + * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS and #TF_DISABLED + * @param[in] callback + * Callback function to call when the timer expires + * + * @return A pointer to the timer structure created + */ timer_link_t *new_timer(int ticks, int flags, timer_callback1_t callback); -/* create a new timer and add to list */ + +/** + * @brief Create a new timer with context and add to list + * + * If you don't need the context, consider using #new_timer instead. + * + * @param[in] ticks + * Number of ticks before the timer should fire + * @param[in] flags + * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS and #TF_DISABLED + * @param[in] callback + * Callback function to call when the timer expires + * @param[in] ctx + * Opaque pointer to pass as an argument to callback + * + * @return A pointer to the timer structure created + */ timer_link_t *new_timer_context(int ticks, int flags, timer_callback2_t callback, void *ctx); -/* start a timer not currently in the list */ + +/** + * @brief Start a timer (not currently in the list) + * + * If you need to associate some data with the timer, consider using + * #start_timer_context to include a pointer in the callback. + * + * @param[in] timer + * Pointer to timer structure to reinsert and start + * @param[in] ticks + * Number of ticks before the timer should fire + * @param[in] flags + * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS, and #TF_DISABLED + * @param[in] callback + * Callback function to call when the timer expires + */ void start_timer(timer_link_t *timer, int ticks, int flags, timer_callback1_t callback); -/* start a timer not currently in the list */ + +/** + * @brief Start a timer (not currently in the list) with context + * + * If you don't need the context, consider using #start_timer instead. + * + * @param[in] timer + * Pointer to timer structure to reinsert and start + * @param[in] ticks + * Number of ticks before the timer should fire + * @param[in] flags + * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS, and #TF_DISABLED + * @param[in] callback + * Callback function to call when the timer expires + * @param[in] ctx + * Opaque pointer to pass as an argument to callback + */ void start_timer_context(timer_link_t *timer, int ticks, int flags, timer_callback2_t callback, void *ctx); -/* reset a timer and add to list */ + +/** + * @brief Reset a timer and add to list + * + * @param[in] timer + * Pointer to timer structure to reinsert and start + */ void restart_timer(timer_link_t *timer); -/* remove a timer from the list */ + +/** + * @brief Stop a timer and remove it from the list + * + * @note This function does not free a timer structure, use #delete_timer + * to do this. + * + * @note It is safe to call this function from a timer callback, including + * to stop a timer from its own callback. + * + * @param[in] timer + * Timer structure to stop and remove + */ void stop_timer(timer_link_t *timer); -/* remove a timer from the list and delete it */ + +/** + * @brief Remove a timer from the list and delete it + * + * @note It is not safe to call this function from a timer callback. + + * @param[in] timer + * Timer structure to stop, remove and free + */ void delete_timer(timer_link_t *timer); #ifdef __cplusplus diff --git a/include/tpak.h b/include/tpak.h index b7917e1ff9..17b19b5c4b 100755 --- a/include/tpak.h +++ b/include/tpak.h @@ -10,6 +10,34 @@ #include #include +/** + * @defgroup transferpak Transfer Pak interface + * @ingroup controller + * @brief Transfer Pak interface + * + * The Transfer Pak interface allows access to Game Boy and Game Boy Color + * cartridges connected through the accessory port of each controller. + * + * Before accessing a Transfer Pak, first call #tpak_init to boot up the + * accessory and ensure that it is in working order. For advanced use-cases, + * #tpak_set_power and #tpak_set_access can also be called directly if you + * need to put the Transfer Pak into a certain mode. You can verify that the + * Transfer Pak is in the correct mode by inspecting the #tpak_get_status flags. + * + * Whenever the Transfer Pak is not in use, it is recommended to power it off + * by calling @ref tpak_set_power "`tpak_set_power(controller, false)`". + * + * You can read the connected Game Boy cartridge's ROM header by calling + * #tpak_get_cartridge_header and validating the result with #tpak_check_header. + * If the ROM header checksum does not match, it is likely that the cartridge + * connection is poor. + * + * You can use #tpak_read and #tpak_write to access the Game Boy cartridge. + * Note that these functions do not account for cartridge bank switching. + * For more information about Game Boy cartridge bank switching, refer to the + * GBDev Pan Docs at https://gbdev.io/pandocs/ + */ + #ifdef __cplusplus extern "C" { #endif @@ -314,15 +342,130 @@ struct gameboy_cartridge_header uint8_t overflow[16]; }; +/** + * @brief Prepare a Transfer Pak for read/write commands. + * + * Powers on the Transfer Pak and enables access to the Game Boy cartridge. + * Also performs status checks to confirm the Transfer Pak can be accessed reliably. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_init(int controller); + +/** + * @brief Set Transfer Pak or Game Boy cartridge status/control value. + * + * This is an internal helper to set a Transfer Pak status or control setting. + * This function is not suitable for setting individual bytes in Save RAM! + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] address + * Address of the setting. Should be between 0x8000 and 0xBFE0 + * @param[in] value + * A byte of data to fill the write buffer with. + */ int tpak_set_value(int controller, uint16_t address, uint8_t value); + +/** + * @brief Set the cartridge data address memory bank for a Transfer Pak. + * + * Change the bank of address space that is available for #tpak_read and + * #tpak_write between Transfer Pak addresses 0xC000 and 0xFFFF. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] bank + * The bank (0-3) to switch to. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_set_bank(int controller, int bank); + +/** + * @brief Set the power enabled flag for a Transfer Pak. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] power_state + * True to power the Transfer Pak and cartridge on, false to turn it off. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_set_power(int controller, bool power_state); + +/** + * @brief Set the access mode flag for a Transfer Pak. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] access_state + * Whether to allow access to the Game Boy cartridge. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_set_access(int controller, bool access_state); + +/** + * @brief Get the status flags for a Transfer Pak. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @return The status byte with @ref TPAK_STATUS flags + */ uint8_t tpak_get_status(int controller); + +/** + * @brief Read the Game Boy cartridge ROM header from a Transfer Pak. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[out] header + * Pointer to destination Game Boy cartridge ROM header data structure. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_get_cartridge_header(int controller, struct gameboy_cartridge_header* header); + +/** + * @brief Verify a Game Boy cartridge ROM header checksum. + * + * Confirms that the Transfer Pak is connected and working properly. + * + * @param[in] header The Game Boy ROM header to check. + */ bool tpak_check_header(struct gameboy_cartridge_header* header); + +/** + * @brief Write data from a buffer to a Game Boy cartridge via Transfer Pak. + * + * Save RAM is located between gameboy addresses 0xA000 and 0xBFFF, which is in the Transfer Pak's bank 2. + * This function does not account for cartridge bank switching, so to switch between MBC1 RAM banks, for example, + * you'll need to switch to Tpak bank 1, and write to address 0xE000, which translates to address 0x6000 on the gameboy. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] address + * address in Game Boy cartridge space to write to. + * @param[in] data + * buffer containing the data to write. + * @param[in] size + * length of the buffer. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_write(int controller, uint16_t address, uint8_t* data, uint16_t size); + +/** + * @brief Read data from a Game Boy cartridge to a buffer via Transfer Pak. + * + * @param[in] controller + * The controller (0-3) with Transfer Pak connected. + * @param[in] address + * address in Game Boy cartridge space to read from. + * @param[in] buffer + * buffer to copy cartridge data into. + * @param[in] size + * length of the data to be read. + * @return 0 if successful or @ref TPAK_ERROR otherwise. + */ int tpak_read(int controller, uint16_t address, uint8_t* buffer, uint16_t size); #ifdef __cplusplus diff --git a/src/audio.c b/src/audio.c index d8776ae733..8de32388dd 100644 --- a/src/audio.c +++ b/src/audio.c @@ -13,42 +13,6 @@ #include #include -/** - * @defgroup audio Audio Subsystem - * @ingroup libdragon - * @brief Interface to the N64 audio hardware. - * - * The audio subsystem handles queueing up chunks of audio data for - * playback using the N64 audio DAC. The audio subsystem handles - * DMAing chunks of data to the audio DAC as well as audio callbacks - * when there is room for another chunk to be written. Buffer size - * is calculated automatically based on the requested audio frequency. - * The audio subsystem accomplishes this by interfacing with the audio - * interface (AI) registers. - * - * Because the audio DAC is timed off of the system clock of the N64, - * the audio subsystem needs to know what region the N64 is from. This - * is due to the fact that the system clock is timed differently for - * PAL, NTSC and MPAL regions. This is handled automatically by the - * audio subsystem based on settings left by the bootloader. - * - * Code attempting to output audio on the N64 should initialize the - * audio subsystem at the desired frequency and with the desired number - * of buffers using #audio_init. More audio buffers allows for smaller - * chances of audio glitches but means that there will be more latency - * in sound output. When new data is available to be output, code should - * check to see if there is room in the output buffers using - * #audio_can_write. Code can probe the current frequency and buffer - * size using #audio_get_frequency and #audio_get_buffer_length respectively. - * When there is additional room, code can add new data to the output - * buffers using #audio_write. Be careful as this is a blocking operation, - * so if code doesn't check for adequate room first, this function will - * not return until there is room and the samples have been written. - * When all audio has been written, code should call #audio_close to shut - * down the audio subsystem cleanly. - * @{ - */ - /** * @name DAC rates for different regions * @{ @@ -208,21 +172,6 @@ static void audio_callback() enable_interrupts(); } - -/** - * @brief Initialize the audio subsystem - * - * This function will set up the AI to play at a given frequency and - * allocate a number of back buffers to write data to. - * - * @note Before re-initializing the audio subsystem to a new playback - * frequency, remember to call #audio_close. - * - * @param[in] frequency - * The frequency in Hz to play back samples at - * @param[in] numbuffers - * The number of buffers to allocate internally - */ void audio_init(const int frequency, int numbuffers) { int clockrate; @@ -311,16 +260,6 @@ void audio_init(const int frequency, int numbuffers) _paused = false; } -/** - * @brief Install a audio callback to fill the audio buffer when required. - * - * This function allows to implement a pull-based audio system. It registers - * a callback which will be invoked under interrupt whenever the AI is ready - * to have more samples enqueued. The callback can fill the provided audio - * data with samples that will be enqueued for DMA to AI. - * - * @param[in] fill_buffer_callback Callback to fill an empty audio buffer - */ void audio_set_buffer_callback(audio_fill_buffer_callback fill_buffer_callback) { disable_interrupts(); @@ -331,12 +270,6 @@ void audio_set_buffer_callback(audio_fill_buffer_callback fill_buffer_callback) enable_interrupts(); } -/** - * @brief Close the audio subsystem - * - * This function closes the audio system and cleans up any internal - * memory allocated by #audio_init. - */ void audio_close() { set_AI_interrupt(0); @@ -375,13 +308,6 @@ static void audio_paused_callback(short *buffer, size_t numsamples) memset(buffer, 0, numsamples * sizeof(short) * 2); } -/** - * @brief Pause or resume audio playback - * - * Should only be used when a fill_buffer_callback has been set - * in #audio_init. - * Silence will be generated while playback is paused. - */ void audio_pause(bool pause) { if (pause != _paused && _fill_buffer_callback) { disable_interrupts(); @@ -443,26 +369,6 @@ void audio_write(const short * const buffer) enable_interrupts(); } -/** - * @brief Start writing to the first free internal buffer. - * - * This function is similar to #audio_write but instead of taking samples - * and copying them to an internal buffer, it returns the pointer to the - * internal buffer. This allows generating the samples directly in the buffer - * that will be sent via DMA to AI, without any subsequent memory copy. - * - * The buffer should be filled with stereo interleaved samples, and - * exactly #audio_get_buffer_length samples should be written. - * - * After you have written the samples, call audio_write_end() to notify - * the library that the buffer is ready to be sent to AI. - * - * @note This function will block until there is room to write an audio sample. - * If you do not want to block, check to see if there is room by calling - * #audio_can_write. - * - * @return Pointer to the internal memory buffer where to write samples. - */ short* audio_write_begin(void) { if(!buffers) @@ -489,14 +395,6 @@ short* audio_write_begin(void) return buffers[now_writing]; } -/** - * @brief Complete writing to an internal buffer. - * - * This function is meant to be used in pair with audio_write_begin(). - * Call this once you have generated the samples, so that the audio - * system knows the buffer has been filled and can be played back. - * - */ void audio_write_end(void) { disable_interrupts(); @@ -505,16 +403,6 @@ void audio_write_end(void) enable_interrupts(); } -/** - * @brief Write a chunk of silence - * - * This function will write silence to be played back by the audio system. - * It writes exactly #audio_get_buffer_length stereo samples. - * - * @note This function will block until there is room to write an audio sample. - * If you do not want to block, check to see if there is room by calling - * #audio_can_write. - */ void audio_write_silence() { if(!buffers) @@ -542,13 +430,6 @@ void audio_write_silence() enable_interrupts(); } -/** - * @brief Return whether there is an empty buffer to write to - * - * This function will check to see if there are any buffers that are not full to - * write data to. If all buffers are full, wait until the AI has played back - * the next buffer in its queue and try writing again. - */ volatile int audio_can_write() { if(!buffers) @@ -561,32 +442,6 @@ volatile int audio_can_write() return (buf_full & (1<status & (PI_STATUS_DMA_BUSY | PI_STATUS_IO_BUSY); } -/** - * @brief Check whether the specified PI address can be accessed doing I/O from CPU - * - * The PI bus covers the full 32-bit address range. The full range is only accessible - * via DMA, though. A part of the range is also memory mapped to the CPU and can be - * accessed via #io_read and #io_write. - * - * The ranges of PI address that can be accessed via CPU are: - * - * * 0x0500_0000 - 0x0FFF_FFFF: used by N64DD and SRAM on cartridge - * * 0x1000_0000 - 0x1FBF_FFFF: cartridge ROM - * * 0x1FD0_0000 - 0x1FFF_FFFF: no known PI peripherals use this - * - * The rest of the 32-bit address range is only accessible via DMA. - * - * Notice also that the range 0x2000_0000 - 0x7FFF_FFFF is theoretically accessible - * by the CPU but only via 64-bit addressing, so it requires assembly instructions - * (as the libdragon toolchain uses 32-bit pointers). No known PI peripherals use this - * range anyway. - * - * This function checks whether the specified address falls into the range accessible - * via CPU or not. - * - * @param pi_address PI address to check - * @return True if the address is memory mapped, false if it is not - */ bool io_accessible(uint32_t pi_address) { // Below 0x0500_0000, there is RDRAM and RCP registers. @@ -110,26 +63,6 @@ volatile int dma_busy(void) return __dma_busy(); } -/** - * @brief Start reading data from a peripheral through PI DMA (low-level) - * - * This function should be used when reading from a cartridge peripheral (typically - * ROM). This function just begins executing a raw DMA transfer, which is - * well-defined only for RAM addresses which are multiple of 8, ROM addresses - * which are multiple of 2, and lengths which are multiple of 2. - * - * Use #dma_wait to wait for the end of the transfer. - * - * See #dma_read_async for a higher level primitive which can perform almost - * arbitrary transfers. - * - * @param[out] ram_address - * Pointer to a buffer to place read data (must be 8-byte aligned) - * @param[in] pi_address - * Memory address of the peripheral to read from (must be 2-byte aligned) - * @param[in] len - * Length in bytes to read into ram_address (must be multiple of 2) - */ void dma_read_raw_async(void * ram_address, unsigned long pi_address, unsigned long len) { assert(len > 0); @@ -148,24 +81,6 @@ void dma_read_raw_async(void * ram_address, unsigned long pi_address, unsigned l enable_interrupts(); } -/** - * @brief Start writing data to a peripheral through PI DMA (low-level) - * - * This function should be used when writing to a cartridge peripheral (typically - * ROM). This function just begins executing a raw DMA transfer, which is - * well-defined only for RAM addresses which are multiple of 8, ROM addresses - * which are multiple of 2, and lengths which are multiple of 2. - * - * Use #dma_wait to wait for the end of the transfer. - * - * - * @param[out] ram_address - * Pointer to a buffer to read data from (must be 8-byte aligned) - * @param[in] pi_address - * Memory address of the peripheral to write to (must be 2-byte aligned) - * @param[in] len - * Length in bytes to write into pi_address (must be multiple of 2) - */ void dma_write_raw_async(const void * ram_address, unsigned long pi_address, unsigned long len) { assert(len > 0); @@ -230,33 +145,6 @@ static uint8_t __io_read8(void *pi_pointer) { return __io_read16(pi_pointer)>>8; } -/** - * @brief Start reading data from a peripheral through PI DMA - * - * This function must be used when reading a chunk of data from a cartridge - * peripheral (typically, ROM). It is a wrapper over #dma_read_raw_async that allows - * arbitrary aligned addresses and any length (including odd sizes). For - * fully-aligned addresses it quickly falls back to #dma_read_raw_async, so it can - * be used generically as "default" PI DMA transfer function. - * - * The only constraint on alignment is that the RAM and PI addresses must have - * the same 1-bit misalignment, that is they must either be even addresses or - * odd addresses. Notice that this function will assert if this constraint is - * not respected. - * - * Use #dma_wait to wait for the end of the transfer. - * - * For non performance sensitive tasks such as reading and parsing data from - * ROM at loading time, a better option is to use DragonFS, where #dfs_read - * falls back to a CPU memory copy to realign the data when required. - * - * @param[out] ram_pointer - * Pointer to a buffer in RDRAM to place read data - * @param[in] pi_address - * Memory address of the peripheral to read from - * @param[in] len - * Length in bytes to read into ram_pointer - */ void dma_read_async(void *ram_pointer, unsigned long pi_address, unsigned long len) { void *ram = UncachedAddr(ram_pointer); @@ -319,33 +207,12 @@ void dma_read_async(void *ram_pointer, unsigned long pi_address, unsigned long l enable_interrupts(); } -/** - * @brief Wait until an async DMA or I/O transfer is finished. - */ void dma_wait(void) { while (__dma_busy()) {} } -/** - * @brief Read data from a peripheral through PI DMA, waiting for completion. - * - * This function performs a blocking read. See #dma_read_async for more information. - * - * @param[out] ram_address - * Pointer to a buffer in RDRAM to place read data - * @param[in] pi_address - * ROM address to read from (must be in range (0x10000000-0x1FFFFFFF). - * @param[in] len - * Length in bytes to read into ram_address - * - * @note This function has always had an historical mistake: the pi_address is mangled - * to be forced into the ROM area (0x10000000-0x1FFFFFFF). This is wrong as the - * PI bus has full 32-bit address, and the same function could have been used - * to access the whole range. - * If you need to read outside the ROM area, use #dma_read_async instead. - */ void dma_read(void *ram_address, unsigned long pi_address, unsigned long len) { pi_address = (pi_address | 0x10000000) & 0x1FFFFFFF; @@ -353,24 +220,6 @@ void dma_read(void *ram_address, unsigned long pi_address, unsigned long len) dma_wait(); } -/** - * @brief Write to a peripheral - * - * This function should be used when writing to the cartridge. - * - * @param[in] ram_address - * Pointer to a buffer to read data from - * @param[in] rom_address - * Cartridge address to write to (must be in range (0x10000000-0x1FFFFFFF). - * @param[in] len - * Length in bytes to write to peripheral - * - * @note This function has always had an historical mistake: the pi_address is mangled - * to be forced into the ROM area (0x10000000-0x1FFFFFFF). This is wrong as the - * PI bus has full 32-bit address, and the same function could have been used - * to access the whole range. - * If you need to read outside the ROM area, use #dma_write_raw_async instead. - */ void dma_write(const void * ram_address, unsigned long rom_address, unsigned long len) { rom_address = (rom_address | 0x10000000) & 0x1FFFFFFF; @@ -378,19 +227,6 @@ void dma_write(const void * ram_address, unsigned long rom_address, unsigned lon dma_wait(); } -/** - * @brief Read a 32 bit integer from a peripheral using the CPU. - * - * @param[in] pi_address - * Memory address of the peripheral to read from - * - * @return The 32 bit value read from the peripheral - * - * @note This function only works if the specified PI address falls within a range - * which is memory mapped on the CPU. See #io_accessible for more information. - * - * @see #io_accessible - */ uint32_t io_read(uint32_t pi_address) { uint32_t retval; @@ -402,23 +238,6 @@ uint32_t io_read(uint32_t pi_address) return retval; } -/** - * @brief Write a 32 bit integer to a peripheral using the CPU. - * - * Notice that writes are performed asynchronously, so the data might have not been - * fully written to the peripheral yet when the function returns. Use #dma_wait if - * you need to wait for the transfer to be finished. - * - * @param[in] pi_address - * Memory address of the peripheral to write to - * @param[in] data - * 32 bit value to write to peripheral - * - * @note This function only works if the specified PI address falls within a range - * which is memory mapped on the CPU. See #io_accessible for more information. - * - * @see #io_accessible - */ void io_write(uint32_t pi_address, uint32_t data) { volatile uint32_t *uncached_address = (uint32_t *)(pi_address | 0xa0000000); @@ -432,5 +251,3 @@ void io_write(uint32_t pi_address, uint32_t data) enable_interrupts(); } - -/** @} */ /* dma */ diff --git a/src/dragonfs.c b/src/dragonfs.c index c945d1d66c..b2863914a2 100644 --- a/src/dragonfs.c +++ b/src/dragonfs.c @@ -14,45 +14,6 @@ #include "rompak_internal.h" #include "utils.h" -/** - * @defgroup dfs DragonFS - * @ingroup asset - * @brief DragonFS filesystem implementation and newlib hooks. - * - * DragonFS is a read only ROM filesystem for the N64. It provides an interface - * that homebrew developers can use to load resources from cartridge space that - * were not available at compile time. This can mean sprites or other game assets, - * or the filesystem can be appended at a later time if the homebrew developer wishes - * end users to be able to insert custom levels, music or other assets. It is loosely - * based off of FAT with consideration into application and limitations of the N64. - * - * The filesystem can be generated using 'mkdfs' which is included in the 'tools' - * directory of libdragon. Due to the read-only nature, DFS does not support empty - * files or empty directories. Attempting to create a filesystem with either of - * these using 'mkdfs' will result in an error. If a filesystem contains either empty - * files or empty directories, the result of manipulating the filesystem is undefined. - * - * DragonFS does not support writing, renaming or symlinking of files. It supports only - * file and directory types. - * - * DFS files have a maximum size of 256 MiB. Directories can have an unlimited - * number of files in them. Each token (separated by a / in the path) can be 243 characters - * maximum. Directories can be 100 levels deep at maximum. There can be 4 files open - * simultaneously. - * - * When DFS is initialized, it will register itself with newlib using 'rom:/' as a prefix. - * Files can be accessed either with standard POSIX functions (open, fopen) using the 'rom:/' - * prefix or the lower-level DFS API calls without prefix. In most cases, it is not necessary - * to use the DFS API directly, given that the standard C functions are more comprehensive. - * Files can be opened using both sets of API calls simultaneously as long as no more than - * four files are open at any one time. - * - * DragonFS does not support file compression; if you want to compress your assets, - * use the asset API (#asset_load / #asset_fopen). - * - * @{ - */ - /** * @brief Directory walking flags */ @@ -702,10 +663,6 @@ static int __dfs_findnext(char *buf, directory_entry_t **next_entry) * Buffer to place the name of the file or directory found * * @return The flags (#FLAGS_FILE, #FLAGS_DIR, #FLAGS_EOF) or a negative value on error. - * - * @note This function uses a global context. Do not attempt other filesystem - * operations (eg: opening a file) while a traversal is in progress. - * You can use #dir_findfirst and #dir_findnext to avoid this limitation. */ int dfs_dir_findfirst(const char * const path, char *buf) { @@ -728,18 +685,6 @@ int dfs_dir_findnext(char *buf) } -/** - * @brief Open a file given a path - * - * Check if we have any free file handles, and if we do, try - * to open the file specified. Supports absolute and relative - * paths - * - * @param[in] path - * Path of the file to open - * - * @return A valid file handle to reference the file by or a negative error on failure. - */ int dfs_open(const char * const path) { /* Try to find file */ @@ -772,14 +717,6 @@ int dfs_open(const char * const path) return OPENFILE_TO_HANDLE(file); } -/** - * @brief Close an already open file handle. - * - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * - * @return DFS_ESUCCESS on success or a negative value on error. - */ int dfs_close(uint32_t handle) { dfs_open_file_t *file = HANDLE_TO_OPENFILE(handle); @@ -795,18 +732,6 @@ int dfs_close(uint32_t handle) return DFS_ESUCCESS; } -/** - * @brief Seek to an offset in the file - * - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * @param[in] offset - * A byte offset from the origin to seek from. - * @param[in] origin - * An offset to seek from. Either `SEEK_SET`, `SEEK_CUR` or `SEEK_END`. - * - * @return DFS_ESUCCESS on success or a negative value on error. - */ int dfs_seek(uint32_t handle, int offset, int origin) { dfs_open_file_t *file = HANDLE_TO_OPENFILE(handle); @@ -872,14 +797,6 @@ int dfs_seek(uint32_t handle, int offset, int origin) return DFS_ESUCCESS; } -/** - * @brief Return the current offset into a file - * - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * - * @return The current byte offset into a file or a negative error on failure. - */ int dfs_tell(uint32_t handle) { /* The good thing is that the location is always in the file structure */ @@ -893,24 +810,6 @@ int dfs_tell(uint32_t handle) return file->loc; } -/** - * @brief Read data from a file - * - * Note that no caching is performed: if you need to read small amounts - * (eg: one byte at a time), consider using standard C API instead (fopen()) - * which performs internal buffering to avoid too much overhead. - * - * @param[out] buf - * Buffer to read into - * @param[in] size - * Size of each element to read - * @param[in] count - * Number of elements to read - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * - * @return The actual number of bytes read or a negative value on failure. - */ int dfs_read(void * const buf, int size, int count, uint32_t handle) { /* This is where we do all the work */ @@ -993,14 +892,6 @@ int dfs_read(void * const buf, int size, int count, uint32_t handle) return (void*)data - buf; } -/** - * @brief Return the file size of an open file - * - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * - * @return The file size in bytes or a negative value on failure. - */ int dfs_size(uint32_t handle) { dfs_open_file_t *file = HANDLE_TO_OPENFILE(handle); @@ -1014,24 +905,6 @@ int dfs_size(uint32_t handle) return file->size; } -/** - * @brief Return the physical address of a file (in ROM space) - * - * This function should be used for highly-specialized, high-performance - * use cases. Using dfs_open / dfs_read is generally acceptable - * performance-wise, and is easier to use rather than managing - * direct access to PI space. - * - * Direct access to ROM data must go through io_read or dma_read. Do not - * dereference directly as the console might hang if the PI is busy. - * - * @param[in] path - * Name of the file - * - * @return A pointer to the physical address of the file body, or 0 - * if the file was not found. - * - */ uint32_t dfs_rom_addr(const char *path) { /* Try to find file */ @@ -1052,14 +925,6 @@ uint32_t dfs_rom_addr(const char *path) return get_start_location(&t_node); } -/** - * @brief Return whether the end of file has been reached - * - * @param[in] handle - * A valid file handle as returned from #dfs_open. - * - * @return 1 if the end of file is reached, 0 if not, and a negative value on error. - */ int dfs_eof(uint32_t handle) { dfs_open_file_t *file = HANDLE_TO_OPENFILE(handle); @@ -1297,30 +1162,6 @@ static void __dfs_check_emulation(void) assertf(0, "Your emulator is not accurate enough to run this ROM.\nSpecifically, it doesn't support accurate PI DMA"); } -/** - * @brief Initialize the filesystem. - * - * Given a base offset where the filesystem should be found, this function will - * initialize the filesystem to read from cartridge space. This function will - * also register DragonFS with newlib so that standard POSIX/C file operations - * work with DragonFS, using the "rom:/" prefix". - * - * The function needs to know where the DFS image is located within the cartridge - * space. To simplify this, you can pass #DFS_DEFAULT_LOCATION which tells - * #dfs_init to search for the DFS image by itself, using the rompak TOC (see - * rompak_internal.h). Most users should use this option. - * - * Otherwise, if the ROM cannot be built with a rompak TOC for some reason, - * a virtual address should be passed. This is normally 0xB0000000 + the offset - * used when building your ROM + the size of the header file used (typically 0x1000). - * - * @param[in] base_fs_loc - * Virtual address in cartridge space at which to find the filesystem, or - * DFS_DEFAULT_LOCATION to automatically search for the filesystem in the - * cartridge (using the rompak). - * - * @return DFS_ESUCCESS on success or a negative error otherwise. - */ int dfs_init(uint32_t base_fs_loc) { /* Detect if we are running on emulator accurate enough to emulate DragonFS. */ @@ -1356,9 +1197,6 @@ int dfs_init(uint32_t base_fs_loc) return DFS_ESUCCESS; } -/** - * @brief Convert DFS error code into an error string - */ const char *dfs_strerror(int error) { switch (error) { @@ -1371,5 +1209,3 @@ const char *dfs_strerror(int error) default: return "Unknown error"; } } - -/** @} */ diff --git a/src/eeprom.c b/src/eeprom.c index 892e0e66b1..97433344de 100644 --- a/src/eeprom.c +++ b/src/eeprom.c @@ -44,14 +44,6 @@ static uint32_t eeprom_status( void ) ); } -/** - * @brief Probe the EEPROM interface on the cartridge. - * - * Inspect the identifier half-word of the EEPROM status response to - * determine which EEPROM save type is available (if any). - * - * @return which EEPROM type was detected on the cartridge. - */ eeprom_type_t eeprom_present( void ) { switch( eeprom_status() >> 8 ) @@ -62,12 +54,6 @@ eeprom_type_t eeprom_present( void ) } } -/** - * @brief Determine how many blocks of EEPROM exist on the cartridge. - * - * @return 0 if EEPROM was not detected - * or the number of EEPROM 8-byte save blocks available. - */ size_t eeprom_total_blocks( void ) { switch ( eeprom_present() ) @@ -78,15 +64,6 @@ size_t eeprom_total_blocks( void ) } } -/** - * @brief Read a block from EEPROM. - * - * @param[in] block - * Block to read data from. Joybus accesses EEPROM in 8-byte blocks. - * - * @param[out] dest - * Destination buffer for the eight bytes read from EEPROM. - */ void eeprom_read( uint8_t block, uint8_t * dest ) { const uint64_t input[JOYBUS_BLOCK_DWORDS] = @@ -107,17 +84,6 @@ void eeprom_read( uint8_t block, uint8_t * dest ) memcpy( dest, &output[1], EEPROM_BLOCK_SIZE ); } -/** - * @brief Write a block to EEPROM. - * - * @param[in] block - * Block to write data to. Joybus accesses EEPROM in 8-byte blocks. - * - * @param[in] src - * Source buffer for the eight bytes of data to write to EEPROM. - * - * @return the EEPROM status byte - */ uint8_t eeprom_write( uint8_t block, const uint8_t * src ) { uint64_t input[JOYBUS_BLOCK_DWORDS] = @@ -140,19 +106,6 @@ uint8_t eeprom_write( uint8_t block, const uint8_t * src ) return output[2] >> 56; } -/** - * @brief Read a buffer of bytes from EEPROM. - * - * This is a high-level convenience helper that abstracts away the - * one-at-a-time EEPROM block access pattern. - * - * @param[out] dest - * Destination buffer to read data into - * @param[in] start - * Byte offset in EEPROM to start reading data from - * @param[in] len - * Byte length of data to read into buffer - */ void eeprom_read_bytes( uint8_t * dest, size_t start, size_t len ) { size_t bytes_left = len; @@ -184,29 +137,6 @@ void eeprom_read_bytes( uint8_t * dest, size_t start, size_t len ) } } -/** - * @brief Write a buffer of bytes to EEPROM. - * - * This is a high-level convenience helper that abstracts away the - * one-at-a-time EEPROM block access pattern. - * - * Each EEPROM block write takes approximately 15 milliseconds; - * this operation may block for a while with large buffer sizes: - * - * * 4k EEPROM: 64 blocks * 15ms = 960ms! - * * 16k EEPROM: 256 blocks * 15ms = 3840ms! - * - * You may want to pause audio before calling this. - * - * @param[in] src - * Source buffer containing data to write - * - * @param[in] start - * Byte offset in EEPROM to start writing data to - * - * @param[in] len - * Byte length of the src buffer - */ void eeprom_write_bytes( const uint8_t * src, size_t start, size_t len ) { size_t bytes_left = len; diff --git a/src/eepromfs.c b/src/eepromfs.c index 4efa14fbd9..5d7514d8b8 100644 --- a/src/eepromfs.c +++ b/src/eepromfs.c @@ -204,36 +204,6 @@ static const eepfs_file_t * eepfs_get_file(int handle) return NULL; } -/** - * @brief Initializes the EEPROM filesystem. - * - * Creates a lookup table of file descriptors based on the configuration - * and validates that the current EEPROM data is likely to be compatible - * with the configured file descriptors. - * - * If the configured filesystem does not fit in the available EEPROM blocks - * on the cartridge, initialization will fail. Even if your total file size - * fits in EEPROM, your filesystem may not fit due to overhead and padding. - * Note that 1 block is reserved for the filesystem signature, and all files - * must start on a block boundary. - * - * You can mitigate this by ensuring that your files are aligned to the - * 8-byte block size and minimizing wasted space with packed structs. - * - * Each file will take up a minimum of 1 block, plus the filesystem itself - * reserves the first block of EEPROM, so the entry count has a practical - * limit of the number of available EEPROM blocks minus 1: - * - * * 4k EEPROM: 63 files maximum. - * * 16k EEPROM: 255 files maximum. - * - * @param[in] entries - * An array of file paths and sizes; see #eepfs_entry_t - * @param[in] count - * The number of entries in the array - * - * @return EEPFS_ESUCCESS on success or a negative error otherwise - */ int eepfs_init(const eepfs_entry_t * entries, size_t count) { /* Check if EEPROM FS has already been initialized */ @@ -310,15 +280,6 @@ int eepfs_init(const eepfs_entry_t * entries, size_t count) return EEPFS_ESUCCESS; } -/** - * @brief De-initializes the EEPROM filesystem. - * - * This cleans up the file lookup table. - * - * You probably won't ever need to call this. - * - * @return EEPFS_ESUCCESS on success or a negative error otherwise - */ int eepfs_close(void) { /* If eepfs was not initialized, don't do anything. */ @@ -336,18 +297,6 @@ int eepfs_close(void) return EEPFS_ESUCCESS; } -/** - * @brief Reads an entire file from the EEPROM filesystem. - * - * @param[in] path - * Path of file in EEPROM filesystem to read from - * @param[out] dest - * Buffer to read into - * @param[in] size - * Size of the destination buffer (in bytes) - * - * @return EEPFS_ESUCCESS on success or a negative error otherwise - */ int eepfs_read(const char * path, void * dest, size_t size) { const int handle = eepfs_find_handle(path); @@ -370,21 +319,6 @@ int eepfs_read(const char * path, void * dest, size_t size) return EEPFS_ESUCCESS; } -/** - * @brief Writes an entire file to the EEPROM filesystem. - * - * Each EEPROM block write takes approximately 15 milliseconds; - * this operation may block for a while! - * - * @param[in] path - * Path of file in EEPROM filesystem to write to - * @param[in] src - * Buffer of data to be written - * @param[in] size - * Size of the source buffer (in bytes) - * - * @return EEPFS_ESUCCESS on success or a negative error otherwise - */ int eepfs_write(const char * path, const void * src, size_t size) { const int handle = eepfs_find_handle(path); @@ -407,22 +341,6 @@ int eepfs_write(const char * path, const void * src, size_t size) return EEPFS_ESUCCESS; } -/** - * @brief Erases a file in the EEPROM filesystem. - * - * Note that "erasing" a file just means writing it full of zeroes. - * All files in the filesystem must always exist at the size specified - * during #eepfs_init - * - * Each EEPROM block write takes approximately 15 milliseconds; - * this operation may block for a while! - * - * Be advised: this is a destructive operation that cannot be undone! - * - * @retval EEPFS_ESUCCESS if successful - * @retval EEPFS_ENOFILE if the path is not a valid file - * @retval EEPFS_EBADINPUT if the path is NULL - */ int eepfs_erase(const char * path) { const int handle = eepfs_find_handle(path); @@ -449,29 +367,6 @@ int eepfs_erase(const char * path) return EEPFS_ESUCCESS; } -/** - * @brief Validates the first block of EEPROM. - * - * There are no guarantees that the data in EEPROM actually matches - * the expected layout of the filesystem. There are many reasons why - * a mismatch can occur: EEPROM re-used from another game; a brand new - * EEPROM that has never been initialized and contains garbage data; - * the filesystem has changed between builds or version of software - * currently in development; EEPROM failing due to age or write limits. - * - * To mitigate these scenarios, it is a good idea to validate that at - * least the first block of EEPROM matches some known good value. - * - * If the signature matches, the data in EEPROM is probably what the - * filesystem expects. If not, the best move is to erase everything - * and start from zero. - * - * @see eepfs_generate_signature - * @see #eepfs_wipe - * - * @retval true if the signature in EEPROM matches the filesystem signature - * @retval false if the signature in EEPROM does not match the filesystem signature - */ bool eepfs_verify_signature(void) { /* Generate the expected signature for the filesystem */ @@ -486,23 +381,6 @@ bool eepfs_verify_signature(void) return memcmp(eeprom_buf, (uint8_t *)&signature, EEPROM_BLOCK_SIZE) == 0; } -/** - * @brief Erases all blocks in EEPROM and sets a new signature. - * - * This is useful when you want to erase all files in the filesystem. - * - * Each EEPROM block write takes approximately 15 milliseconds; - * this operation may block for a while: - * - * * 4k EEPROM: 64 blocks * 15ms = 960ms! - * * 16k EEPROM: 256 blocks * 15ms = 3840ms! - * - * You may want to pause audio in advance of calling this. - * - * Be advised: this is a destructive operation that cannot be undone! - * - * @see #eepfs_verify_signature - */ void eepfs_wipe(void) { /* Write the filesystem signature into the first block */ diff --git a/src/exception.c b/src/exception.c index e7a6528d2a..ef14323a79 100644 --- a/src/exception.c +++ b/src/exception.c @@ -15,20 +15,6 @@ #include #include -/** - * @defgroup exceptions Exception Handler - * @ingroup lowlevel - * @brief Handle hardware-generated exceptions. - * - * The exception handler traps exceptions generated by hardware. This could - * be an invalid instruction or invalid memory access exception or it could - * be a reset exception. In both cases, a handler registered with - * #register_exception_handler will be passed information regarding the - * exception type and relevant registers. - * - * @{ - */ - /** * @brief Syscall exception handler entry */ @@ -51,36 +37,6 @@ extern volatile reg_block_t __baseRegAddr; /** @brief Syscall exception handlers */ static syscall_handler_entry_t __syscall_handlers[MAX_SYSCALL_HANDLERS]; -/** - * @brief Register an exception handler to handle exceptions - * - * The registered handle is responsible for clearing any bits that may cause - * a re-trigger of the same exception and updating the EPC. An important - * example is the cause bits (12-17) of FCR31 from cop1. To prevent - * re-triggering the exception they should be cleared by the handler. - * - * To manipulate the registers, update the values in the exception_t struct. - * They will be restored to appropriate locations when returning from the - * handler. Setting them directly will not work as expected as they will get - * overwritten with the values pointed by the struct. - * - * There is only one exception to this, cr (cause register) which is also - * modified by the int handler before the saved values are restored thus it - * is only possible to update it through C0_WRITE_CR macro if it is needed. - * This shouldn't be necessary though as they are already handled by the - * library. - * - * k0 ($26), k1 ($27) are not saved/restored and will not be available in the - * handler. Theoretically we can exclude s0-s7 ($16-$23), and gp ($28) to gain - * some performance as they are already saved by GCC when necessary. The same - * is true for sp ($29) and ra ($31) but current interrupt handler manipulates - * them via allocating a new stack and doing a jal. Similarly floating point - * registers f21-f31 are callee-saved. In the future we may consider removing - * them from the save state for interrupts (but not for exceptions) - * - * @param[in] cb - * Callback function to call when exceptions happen - */ exception_handler_t register_exception_handler( exception_handler_t cb ) { exception_handler_t old = __exception_handler; @@ -234,13 +190,6 @@ static void debug_exception(exception_t* ex) { } #endif -/** - * @brief Default exception handler. - * - * This handler is installed by default for all exceptions. It initializes - * the console and dump the exception state to the screen, including the value - * of all GPR/FPR registers. It then calls abort() to abort execution. - */ void exception_default_handler(exception_t* ex) { #ifndef NDEBUG static bool backtrace_exception = false; @@ -442,29 +391,6 @@ void __onCriticalException(reg_block_t* regs) __exception_handler(&e); } -/** - * @brief Register a handler that will be called when a syscall exception - * - * This function allows to register a handler to be invoked in response to a - * syscall exception, generated by the SYSCALL opcode. The opcode allows to - * specify a 20-bit code which, in a more traditional operating system architecture, - * corresponds to the "service" to be called. - * - * When the registered handler returns, the execution will resume from the - * instruction following the syscall one. - * - * To allow for different usages of the code field, this function accepts - * a range of codes to associated with the handler. This allows a single handler - * to be invoked for multiple different codes, to specialize services. - * - * @note Syscall codes in the range 0x00000 - 0x0FFFF are reserved to libdragon - * itself. Use a code outside that range to avoid conflicts with future versions - * of libdragon. - * - * @param handler Handler to invoke when a syscall exception is triggered - * @param first_code First syscall code to associate with this handler (begin of range) - * @param last_code Last syscall code to associate with this handler (end of range) - */ void register_syscall_handler( syscall_handler_t handler, uint32_t first_code, uint32_t last_code ) { assertf(first_code <= 0xFFFFF, "The maximum allowed syscall code is 0xFFFFF (requested: %05lx)\n", first_code); @@ -528,6 +454,3 @@ void __onSyscallException( reg_block_t* regs ) // Skip syscall opcode to continue execution e.regs->epc += 4; } - - -/** @} */ diff --git a/src/graphics.c b/src/graphics.c index be6b7c9586..4aebb360ae 100644 --- a/src/graphics.c +++ b/src/graphics.c @@ -14,36 +14,6 @@ #include "surface.h" #include "sprite_internal.h" -/** - * @defgroup graphics 2D Graphics - * @ingroup display - * @brief Software routines for manipulating graphics in a display context. - * - * The graphics subsystem is responsible for software manipulation of a display - * context as returned from the @ref display. All of the functions use a pure - * software drawing method and are thus much slower than hardware sprite support. - * However, they are slightly more flexible and offer no hardware limitations - * in terms of sprite size. - * - * Code wishing to draw to the screen should first acquire a display context - * using #display_get. Once the display context is acquired, code may draw to - * the context using any of the graphics functions present. Wherever practical, - * two versions of graphics functions are available: a transparent variety and - * a non-transparent variety. Code that wishes to display sprites without - * transparency can get a slight performance boost by using the non-transparent - * variety of calls since no software alpha blending needs to occur. Once - * code has finished drawing to the display context, it can be displayed to the - * screen using #display_show. - * - * The graphics subsystem makes use of the same contexts as the @ref rdp. Thus, - * with careful coding, both hardware and software routines can be used to draw - * to the display context with no ill effects. The colors returned by - * #graphics_make_color and #graphics_convert_color are also compatible with both - * hardware and software graphics routines. - * - * @{ - */ - /** * @brief Struct that holds the current loaded font. We load the default font on * the first #graphics_draw_character call if #graphics_set_font_sprite was not called. @@ -113,29 +83,6 @@ static uint32_t f_color = 0xFFFFFFFF; */ static uint32_t b_color = 0x00000000; -/** - * @brief Return a packed 32-bit representation of an RGBA color - * - * This is exactly the same as calling `graphics_convert_color(RGBA32(r,g,b,a))`. - * Refer to #graphics_convert_color for more information. - * - * @deprecated By switching to the rdpq API, this function should not be required - * anymore. Use #RGBA32 or #RGBA16 instead. Please avoid using it in new code if possible. - * - * @param[in] r - * 8-bit red value - * @param[in] g - * 8-bit green value - * @param[in] b - * 8-bit blue value - * @param[in] a - * 8-bit alpha value. Note that 255 is opaque and 0 is transparent - * - * @return a 32-bit representation of the color suitable for blitting in software or hardware - * - * @see #graphics_convert_color - * - */ uint32_t graphics_make_color( int r, int g, int b, int a ) { color_t color; @@ -148,27 +95,6 @@ uint32_t graphics_make_color( int r, int g, int b, int a ) return graphics_convert_color( color ); } -/** - * @brief Convert a color structure to a 32-bit representation of an RGBA color - * - * This function is similar to #color_to_packed16 and #color_to_packed32, but - * automatically picks the version matching with the current display configuration. - * Notice that this might be wrong if you are drawing to an arbitrary surface rather - * than a framebuffer. - * - * @note In 16 bpp mode, this function will return a packed 16-bit color - * in BOTH the lower 16 bits and the upper 16 bits. In general, this is not necessary. - * However, for drawing with the old deprecated RDP API (in particular, - * rdp_set_primitive_color), this is still required. - * - * @deprecated By switching to the rdpq API, this function should not be required - * anymore. Please avoid using it in new code if possible. - * - * @param[in] color - * A color structure representing an RGBA color - * - * @return a 32-bit representation of the color suitable for blitting in software or hardware - */ uint32_t graphics_convert_color( color_t color ) { if( display_get_bitdepth() == 2 ) @@ -183,18 +109,6 @@ uint32_t graphics_convert_color( color_t color ) } } -/** - * @brief Set the current forecolor and backcolor for text operations - * - * @param[in] forecolor - * 32-bit RGBA color to use as the text color. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - * @param[in] backcolor - * 32-bit RGBA color to use as the background color for text. Use - * #graphics_convert_color or #graphics_make_color to generate this value. - * Note that if the color given is transparent, text can be written over - * other graphics without background colors showing. - */ void graphics_set_color( uint32_t forecolor, uint32_t backcolor ) { f_color = forecolor; @@ -228,22 +142,6 @@ static int __is_transparent( int bitdepth, uint32_t color ) return 0; } -/** - * @brief Draw a pixel to a given display context - * - * @note This function does not support transparency for speed purposes. To draw - * a transparent or translucent pixel, use #graphics_draw_pixel_trans. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The x coordinate of the pixel. - * @param[in] y - * The y coordinate of the pixel. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_pixel( surface_t* disp, int x, int y, uint32_t color ) { if( disp == 0 ) { return; } @@ -259,22 +157,6 @@ void graphics_draw_pixel( surface_t* disp, int x, int y, uint32_t color ) } } -/** - * @brief Draw a pixel to a given display context with alpha support - * - * @note This function is much slower than #graphics_draw_pixel for 32-bit - * pixels due to the need to sample the current pixel to do software alpha-blending. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The x coordinate of the pixel. - * @param[in] y - * The y coordinate of the pixel. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_pixel_trans( surface_t* disp, int x, int y, uint32_t color ) { if( disp == 0 ) { return; } @@ -322,26 +204,6 @@ void graphics_draw_pixel_trans( surface_t* disp, int x, int y, uint32_t color ) } } -/** - * @brief Draw a line to a given display context - * - * @note This function does not support transparency for speed purposes. To draw - * a transparent or translucent line, use #graphics_draw_line_trans. - * - * @param[in] disp - * The currently active display context. - * @param[in] x0 - * The x coordinate of the start of the line. - * @param[in] y0 - * The y coordinate of the start of the line. - * @param[in] x1 - * The x coordinate of the end of the line. - * @param[in] y1 - * The y coordinate of the end of the line. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_line( surface_t* disp, int x0, int y0, int x1, int y1, uint32_t color ) { int dy = y1 - y0; @@ -400,26 +262,6 @@ void graphics_draw_line( surface_t* disp, int x0, int y0, int x1, int y1, uint32 } } -/** - * @brief Draw a line to a given display context with alpha support - * - * @note This function is much slower than #graphics_draw_line for 32-bit - * buffers due to the need to sample the current pixel to do software alpha-blending. - * - * @param[in] disp - * The currently active display context. - * @param[in] x0 - * The x coordinate of the start of the line. - * @param[in] y0 - * The y coordinate of the start of the line. - * @param[in] x1 - * The x coordinate of the end of the line. - * @param[in] y1 - * The y coordinate of the end of the line. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_line_trans( surface_t* disp, int x0, int y0, int x1, int y1, uint32_t color ) { int dy = y1 - y0; @@ -478,26 +320,6 @@ void graphics_draw_line_trans( surface_t* disp, int x0, int y0, int x1, int y1, } } -/** - * @brief Draw a filled rectangle to a display context - * - * @note This function does not support transparency for speed purposes. To draw - * a transparent or translucent box, use #graphics_draw_box_trans. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The x coordinate of the top left of the box. - * @param[in] y - * The y coordinate of the top left of the box. - * @param[in] width - * The width of the box in pixels. - * @param[in] height - * The height of the box in pixels. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_box( surface_t* disp, int x, int y, int width, int height, uint32_t color ) { if( disp == 0 ) { return; } @@ -529,26 +351,6 @@ void graphics_draw_box( surface_t* disp, int x, int y, int width, int height, ui } } -/** - * @brief Draw a filled rectangle to a display context - * - * @note This function is much slower than #graphics_draw_box for 32-bit - * buffers due to the need to sample the current pixel to do software alpha-blending. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The x coordinate of the top left of the box. - * @param[in] y - * The y coordinate of the top left of the box. - * @param[in] width - * The width of the box in pixels. - * @param[in] height - * The height of the box in pixels. - * @param[in] color - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_draw_box_trans( surface_t* disp, int x, int y, int width, int height, uint32_t color ) { if( disp == 0 ) { return; } @@ -612,18 +414,6 @@ void graphics_draw_box_trans( surface_t* disp, int x, int y, int width, int heig } } -/** - * @brief Fill the entire screen with a particular color - * - * @note Since this function is designed for blanking the screen, alpha values for - * colors are ignored. - * - * @param[in] disp - * The currently active display context. - * @param[in] c - * The 32-bit RGBA color to draw to the screen. Use #graphics_convert_color - * or #graphics_make_color to generate this value. - */ void graphics_fill_screen( surface_t* disp, uint32_t c ) { if( disp == 0 ) { return; } @@ -636,30 +426,12 @@ void graphics_fill_screen( surface_t* disp, uint32_t c ) buffer[i] = c64; } -/** - * @brief Set the font to the default. - */ void graphics_set_default_font( void ) { sprite_t *font = (sprite_t *)(display_get_bitdepth() == 2 ? __font_data_16 : __font_data_32); graphics_set_font_sprite( font ); } -/** - * @brief Set the current font. Should be set before using any of the draw function. - * - * The sprite font should be imported using hslices/vslices according to the amount of characters it has. - * The amount of hslices vs vslices does not matter for this, but it should include the whole ASCII - * range that you will want to use, including characters from the 0 to 32 range. Normally the sprite should have - * 127 slices to cover the normal ASCII range. - * - * During rendering, the slice used will be the same number as the char (eg.: character 'A' will use slice 65). - * - * You can see an example of a sprite font (that has the default font double sized) under examples/customfont. - * - * @param[in] font - * Sprite font to be used. - */ void graphics_set_font_sprite( sprite_t *font ) { sprite_font.sprite = font; @@ -667,23 +439,6 @@ void graphics_set_font_sprite( sprite_t *font ) sprite_font.font_height = sprite_font.sprite->height / sprite_font.sprite->vslices; } -/** - * @brief Draw a character to the screen using the built-in font - * - * Draw a character from the built-in font to the screen. This function does not support alpha blending, - * only binary transparency. If the background color is fully transparent, the font is drawn with no - * background. Otherwise, the font is drawn on a fully colored background. The foreground and background - * can be set using #graphics_set_color. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the character drawn. - * @param[in] y - * The Y coordinate to place the top left pixel of the character drawn. - * @param[in] ch - * The ASCII character to draw to the screen. - */ void graphics_draw_character( surface_t* disp, int x, int y, char ch ) { if( disp == 0 ) { return; } @@ -762,26 +517,6 @@ void graphics_draw_character( surface_t* disp, int x, int y, char ch ) } } -/** - * @brief Draw a null terminated string to a display context - * - * Draw a string to the screen, following a few simple rules. Standard ASCII is supported, as well - * as \\r, \\n, space and tab. \\r and \\n will both cause the next character to be rendered one line - * lower and at the x coordinate specified in the parameters. The tab character inserts five spaces. - * - * This function does not support alpha blending, only binary transparency. If the background color is - * fully transparent, the font is drawn with no background. Otherwise, the font is drawn on a fully - * colored background. The foreground and background can be set using #graphics_set_color. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the character drawn. - * @param[in] y - * The Y coordinate to place the top left pixel of the character drawn. - * @param[in] msg - * The ASCII null terminated string to draw to the screen. - */ void graphics_draw_text( surface_t* disp, int x, int y, const char * const msg ) { if( disp == 0 ) { return; } @@ -816,68 +551,12 @@ void graphics_draw_text( surface_t* disp, int x, int y, const char * const msg ) } } -/** - * @brief Draw a sprite to a display context - * - * Given a sprite structure, this function will draw a sprite to the display context - * with clipping support. - * - * @note This function does not support alpha blending for speed purposes. For - * alpha blending support, please see #graphics_draw_sprite_trans - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped horizontally. - * @param[in] y - * The Y coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped vertically. - * @param[in] sprite - * Pointer to a sprite structure to display to the screen. - */ void graphics_draw_sprite( surface_t* disp, int x, int y, sprite_t *sprite ) { /* Simply a wrapper to call the original functionality */ graphics_draw_sprite_stride( disp, x, y, sprite, -1 ); } -/** - * @brief Draw a sprite from a spritemap to a display context - * - * Given a sprite structure, this function will draw a sprite out of a larger spritemap - * to the display context with clipping support. This function is useful for software - * tilemapping. If a sprite was generated as a spritemap (it has more than one horizontal - * or vertical slice), this function can display a slice of the sprite as a standalone sprite. - * - * Given a sprite with 3 horizontal slices and 2 vertical slices, the offsets would be as follows: - * - *
- * *---*---*---*
- * | 0 | 1 | 2 |
- * *---*---*---*
- * | 3 | 4 | 5 |
- * *---*---*---*
- * 
- * - * @note This function does not support alpha blending for speed purposes. For - * alpha blending support, please see #graphics_draw_sprite_trans_stride - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped horizontally. - * @param[in] y - * The Y coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped vertically. - * @param[in] sprite - * Pointer to a sprite structure to display to the screen. - * @param[in] offset - * Offset of the sprite to display out of the spritemap. The offset is counted - * starting from 0. The top left sprite in the map is 0, the next one to the right - * is 1, and so on. - */ void graphics_draw_sprite_stride( surface_t* disp, int x, int y, sprite_t *sprite, int offset ) { /* Sanity checking */ @@ -987,69 +666,12 @@ void graphics_draw_sprite_stride( surface_t* disp, int x, int y, sprite_t *sprit } } -/** - * @brief Draw a sprite to a display context with alpha transparency - * - * Given a sprite structure, this function will draw a sprite to the display context - * with clipping support. - * - * @note This function supports alpha blending and is much slower for 32-bit sprites. - * If you do not need alpha blending support, please see #graphics_draw_sprite. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped horizontally. - * @param[in] y - * The Y coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped vertically. - * @param[in] sprite - * Pointer to a sprite structure to display to the screen. - */ void graphics_draw_sprite_trans( surface_t* disp, int x, int y, sprite_t *sprite ) { /* Simply a wrapper to call the original functionality */ graphics_draw_sprite_trans_stride( disp, x, y, sprite, -1 ); } -/** - * @brief Draw a sprite from a spritemap to a display context - * - * Given a sprite structure, this function will draw a sprite out of a larger spritemap - * to the display context with clipping support. This function is useful for software - * tilemapping. If a sprite was generated as a spritemap (it has more than one horizontal - * or vertical slice), this function can display a slice of the sprite as a standalone sprite. - * - * Given a sprite with 3 horizontal slices and 2 vertical slices, the offsets would be as follows: - * - *
- * *---*---*---*
- * | 0 | 1 | 2 |
- * *---*---*---*
- * | 3 | 4 | 5 |
- * *---*---*---*
- * 
- * - * @note This function supports alpha blending and is much slower for 32-bit sprites. - * If you do not need alpha blending support, please see #graphics_draw_sprite_stride. - * - * @param[in] disp - * The currently active display context. - * @param[in] x - * The X coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped horizontally. - * @param[in] y - * The Y coordinate to place the top left pixel of the sprite. This can - * be negative if the sprite is clipped vertically. - * @param[in] sprite - * Pointer to a sprite structure to display to the screen. - * @param[in] offset - * Offset of the sprite to display out of the spritemap. The offset is counted - * starting from 0. The top left sprite in the map is 0, the next one to the right - * is 1, and so on. - */ - void graphics_draw_sprite_trans_stride( surface_t* disp, int x, int y, sprite_t *sprite, int offset ) { /* Sanity checking */ @@ -1196,5 +818,3 @@ extern inline uint16_t color_to_packed16(color_t c); extern inline uint32_t color_to_packed32(color_t c); extern inline color_t color_from_packed16(uint16_t c); extern inline color_t color_from_packed32(uint32_t c); - -/** @} */ /* graphics */ diff --git a/src/interrupt.c b/src/interrupt.c index e1ad16f224..7783c601f2 100644 --- a/src/interrupt.c +++ b/src/interrupt.c @@ -7,40 +7,6 @@ #include "libdragon.h" #include "regsinternal.h" -/** - * @defgroup interrupt Interrupt Controller - * @ingroup lowlevel - * @brief N64 interrupt registering and servicing routines. - * - * The N64 interrupt controller provides a software interface to - * register for interrupts from the various systems in the N64. - * Most interrupts on the N64 coordinate through the MIPS interface - * (MI) to allow interrupts to be handled at one spot. A notable - * exception is the timer interrupt which is generated by the MIPS - * r4300 itself and not the N64 hardware. - * - * The interrupt controller is automatically initialized before - * main is called. By default, all interrupts are enabled and any - * registered callback can be called when an interrupt occurs. - * Each of the N64-generated interrupts is maskable using the various - * set accessors. - * - * Interrupts can be enabled or disabled as a whole on the N64 using - * #enable_interrupts and #disable_interrupts. It is assumed that - * once the interrupt system is activated, these will always be called - * in pairs. Calling #enable_interrupts without first calling - * #disable_interrupts is considered a violation of this assumption - * and should be avoided. Calling #disable_interrupts when interrupts - * are already disabled will have no effect interrupts-wise - * (but should be paired with a #enable_interrupts regardless), - * and in that case the paired #enable_interrupts will not enable - * interrupts either. - * In this manner, it is safe to nest calls to disable and enable - * interrupts. - * - * @{ - */ - /** @brief Bit to set to clear the PI interrupt */ #define PI_CLEAR_INTERRUPT 0x02 /** @brief Bit to set to clear the SI interrupt */ @@ -374,238 +340,87 @@ void __RESET_handler( void ) } } -/** - * @brief Register an AI callback - * - * @param[in] callback - * Function to call when an AI interrupt occurs - */ void register_AI_handler( void (*callback)() ) { __register_callback(&AI_callback,callback); } -/** - * @brief Unregister an AI callback - * - * @param[in] callback - * Function that should no longer be called on AI interrupts - */ void unregister_AI_handler( void (*callback)() ) { __unregister_callback(&AI_callback,callback); } -/** - * @brief Register a VI callback - * - * @param[in] callback - * Function to call when a VI interrupt occurs - */ void register_VI_handler( void (*callback)() ) { __register_callback(&VI_callback,callback); } -/** - * @brief Unregister a VI callback - * - * @param[in] callback - * Function that should no longer be called on VI interrupts - */ void unregister_VI_handler( void (*callback)() ) { __unregister_callback(&VI_callback,callback); } -/** - * @brief Register a PI callback - * - * @param[in] callback - * Function to call when a PI interrupt occurs - */ void register_PI_handler( void (*callback)() ) { __register_callback(&PI_callback,callback); } -/** - * @brief Unegister a PI callback - * - * @param[in] callback - * Function that should no longer be called on PI interrupts - */ void unregister_PI_handler( void (*callback)() ) { __unregister_callback(&PI_callback,callback); } -/** - * @brief Register a DP callback - * - * @param[in] callback - * Function to call when a DP interrupt occurs - */ void register_DP_handler( void (*callback)() ) { __register_callback(&DP_callback,callback); } -/** - * @brief Unregister a DP callback - * - * @param[in] callback - * Function that should no longer be called on DP interrupts - */ void unregister_DP_handler( void (*callback)() ) { __unregister_callback(&DP_callback,callback); } -/** - * @brief Register a SI callback - * - * @param[in] callback - * Function to call when a SI interrupt occurs - */ void register_SI_handler( void (*callback)() ) { __register_callback(&SI_callback,callback); } -/** - * @brief Unegister a SI callback - * - * @param[in] callback - * Function that should no longer be called on SI interrupts - */ void unregister_SI_handler( void (*callback)() ) { __unregister_callback(&SI_callback,callback); } -/** - * @brief Register a SP callback - * - * @param[in] callback - * Function to call when a SP interrupt occurs - */ void register_SP_handler( void (*callback)() ) { __register_callback(&SP_callback,callback); } -/** - * @brief Unegister a SP callback - * - * @param[in] callback - * Function that should no longer be called on SP interrupts - */ void unregister_SP_handler( void (*callback)() ) { __unregister_callback(&SP_callback,callback); } -/** - * @brief Register a timer callback - * - * The callback will be used when the timer interrupt is triggered by the CPU. - * This happens when the COP0 COUNT register reaches the same value of the - * COP0 COMPARE register. - * - * This function is useful only if you want to do your own low level programming - * of the internal CPU timer and handle the interrupt yourself. In this case, - * also remember to activate the timer interrupt using #set_TI_interrupt. - * - * @note If you use the timer library (#timer_init and #new_timer), you do not - * need to call this function, as timer interrupt are already handled by the timer - * library. - * - * @param[in] callback - * Function to call when a timer interrupt occurs - */ void register_TI_handler( void (*callback)() ) { __register_callback(&TI_callback,callback); } -/** - * @brief Unregister a timer callback - * - * @note If you use the timer library (#timer_init and #new_timer), you do not - * need to call this function, as timer interrupt are already handled by the timer - * library. - * - * @param[in] callback - * Function that should no longer be called on timer interrupts - */ void unregister_TI_handler( void (*callback)() ) { __unregister_callback(&TI_callback,callback); } -/** - * @brief Register a CART interrupt callback. - * - * The callback will be called when a CART interrupt is triggered. CART interrupts - * are interrupts triggered by devices attached to the PI bus (aka CART bus), - * for instance the 64DD, or the modem cassette. - * - * CART interrupts are disabled by default in libdragon. Use #set_CART_interrupt - * to enable/disable them. - * - * Notice that there is no generic way to acknowledge those interrupts, so if - * you activate CART interrupts, make also sure to register an handler that - * acknowledge them, otherwise the interrupt will deadlock the console. - * - * @param[in] callback - * Function that should no longer be called on CART interrupts - */ void register_CART_handler( void (*callback)() ) { __register_callback(&CART_callback,callback); } -/** - * @brief Unregister a CART interrupt callback - * - * @param[in] callback - * Function that should no longer be called on CART interrupts - */ void unregister_CART_handler( void (*callback)() ) { __unregister_callback(&CART_callback,callback); } -/** - * @brief Register a handler that will be called when the user - * presses the RESET button. - * - * The N64 sends an interrupt when the RESET button is pressed, - * and then actually resets the console after about ~500ms (but less - * on some models, see #RESET_TIME_LENGTH). - * - * Registering a handler can be used to perform a clean reset. - * Technically, at the hardware level, it is important that the RCP - * is completely idle when the reset happens, or it might freeze - * and require a power-cycle to unfreeze. This means that any - * I/O, audio, video activity must cease before #RESET_TIME_LENGTH - * has elapsed. - * - * This entry point can be used by the game code to basically - * halts itself and stops issuing commands. Libdragon itself will - * register handlers to halt internal modules so to provide a basic - * good reset experience. - * - * Handlers can use #exception_reset_time to read how much has passed - * since the RESET button was pressed. - * - * @param callback Callback to invoke when the reset button is pressed. - * - * @note Reset handlers are called under interrupt. - * - */ void register_RESET_handler( void (*callback)() ) { for (int i=0;i -/** - * @defgroup cpak Controller Pak Filesystem Routines - * @ingroup controller - * @brief Managed Controller Pak interface. - * - * The Controller Pak system is a subsystem of the @ref controller. Before attempting to - * read from or write to a Controller Pak, be sure you have initialized the Joypad subsystem - * with #joypad_init and verified that you have a Controller Pak in the correct controller - * using #joypad_get_accessory_type. - * - * To read and write to the Controller Pak in an organized way compatible with official software, - * first check that the Controller Pak is valid using #validate_mempak. If the Controller Pak is - * invalid, it will need to be formatted using #format_mempak. Once the Controller Pak is - * considered valid, existing notes can be enumerated using #get_mempak_entry. To - * read the data associated with a note, use #read_mempak_entry_data. To write a - * new note to the Controller Pak, use #write_mempak_entry_data. Note that there is no append - * functionality so if a note is being updated, ensure you have deleted the old note - * first using #delete_mempak_entry. Code should be careful to check how many blocks - * are free before writing using #get_mempak_free_space. - * - * @{ - */ - /** * @name Inode values * @{ @@ -46,23 +23,6 @@ #define BLOCK_VALID_LAST 0x7F /** @} */ -/** - * @brief Read a sector from a Controller Pak - * - * This will read a sector from a Controller Pak. Sectors on Controller Paks are always 256 bytes - * in size. - * - * @param[in] controller - * The controller (0-3) to read a sector from - * @param[in] sector - * The sector (0-127) to read from - * @param[out] sector_data - * Buffer to place 256 read bytes of data - * - * @retval 0 if reading was successful - * @retval -1 if the sector was out of bounds or sector_data was null - * @retval -2 if there was an error reading part of a sector - */ int read_mempak_sector( int controller, int sector, uint8_t *sector_data ) { if( sector < 0 || sector >= 128 ) { return -1; } @@ -81,23 +41,6 @@ int read_mempak_sector( int controller, int sector, uint8_t *sector_data ) return 0; } -/** - * @brief Write a sector to a Controller Pak - * - * This will write a sector to a Controller Pak. Sectors on Controller Paks are always 256 bytes - * in size. - * - * @param[in] controller - * The controller (0-3) to write a sector to - * @param[in] sector - * The sector (0-127) to write to - * @param[out] sector_data - * Buffer containing 256 bytes of data to write to sector - * - * @retval 0 if writing was successful - * @retval -1 if the sector was out of bounds or sector_data was null - * @retval -2 if there was an error writing part of a sector - */ int write_mempak_sector( int controller, int sector, uint8_t *sector_data ) { if( sector < 0 || sector >= 128 ) { return -1; } @@ -728,19 +671,6 @@ static int __get_valid_toc( int controller ) } } -/** - * @brief Return whether a Controller Pak is valid - * - * This function will return whether the Controller Pak in a particular controller - * is formatted and valid. - * - * @param[in] controller - * The controller (0-3) to validate - * - * @retval 0 if the Controller Pak is valid and ready to be used - * @retval -2 if the Controller Pak is not present or couldn't be read - * @retval -3 if the Controller Pak is bad or unformatted - */ int validate_mempak( int controller ) { int toc = __get_valid_toc( controller ); @@ -757,23 +687,6 @@ int validate_mempak( int controller ) } } -/** - * @brief Read an entry on a Controller Pak - * - * Given an entry index (0-15), return the entry as found on the Controller Pak. If - * the entry is blank or invalid, the valid flag is cleared. - * - * @param[in] controller - * The controller (0-3) from which the entry should be read - * @param[in] entry - * The entry index (0-15) to read - * @param[out] entry_data - * Structure containing information on the entry - * - * @retval 0 if the entry was read successfully - * @retval -1 if the entry is out of bounds or entry_data is null - * @retval -2 if the Controller Pak is bad or not present - */ int get_mempak_entry( int controller, int entry, entry_structure_t *entry_data ) { uint8_t data[MEMPAK_BLOCK_SIZE]; @@ -828,17 +741,6 @@ int get_mempak_entry( int controller, int entry, entry_structure_t *entry_data ) } } -/** - * @brief Return the number of free blocks on a Controller Pak - * - * Note that a block is identical in size to a sector. To calculate the number of - * bytes free, multiply the return of this function by #MEMPAK_BLOCK_SIZE. - * - * @param[in] controller - * The controller (0-3) to read the free space from - * - * @return The number of blocks free on the memory card or a negative number on failure - */ int get_mempak_free_space( int controller ) { uint8_t data[MEMPAK_BLOCK_SIZE]; @@ -861,18 +763,6 @@ int get_mempak_free_space( int controller ) return __get_free_space( data ); } -/** - * @brief Format a Controller Pak - * - * Formats a Controller Pak. This should only be done to totally wipe and re-initialize - * the filesystem in case of a blank or corrupt Controller Pak after a repair has failed. - * - * @param[in] controller - * The Controller (0-3) that the Controller Pak is inserted. - * - * @retval 0 if the Controller Pak was formatted successfully. - * @retval -2 if the Controller Pak was not present or couldn't be formatted. - */ int format_mempak( int controller ) { /* set the size to 1280 as the notes table (768-1280) will need to be initialized. */ @@ -949,28 +839,6 @@ int format_mempak( int controller ) return 0; } -/** - * @brief Read the data associated with an entry on a Controller Pak - * - * Given a valid Controller Pak entry fetched by get_mempak_entry, retrieves the contents - * of the entry. The calling function must ensure that enough room is available in - * the passed in buffer for the entire entry. The entry structure itself contains - * the number of blocks used to store the data which can be multiplied by - * #MEMPAK_BLOCK_SIZE to calculate the size of the buffer needed. - * - * @param[in] controller - * The controller (0-3) to read the entry data from - * @param[in] entry - * The entry structure associated with the data to be read. An entry - * structure can be fetched based on index using #get_mempak_entry - * @param[out] data - * The data associated with an entry - * - * @retval 0 if the entry was successfully read - * @retval -1 if input parameters were out of bounds or the entry was corrupted somehow - * @retval -2 if the Controller Pak was not present or bad - * @retval -3 if the data couldn't be read - */ int read_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t *data ) { int toc; @@ -1012,28 +880,6 @@ int read_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t *d return 0; } -/** - * @brief Write associated data to a Controller Pak entry - * - * Given a Controller Pak entry structure with a valid region, name and block count, writes the - * entry and associated data to the Controller Pak. This function will not overwrite any existing - * user data. To update an existing entry, use #delete_mempak_entry followed by - * #write_mempak_entry_data with the same entry structure. - * - * @param[in] controller - * The controller (0-3) to write the entry and data to - * @param[in] entry - * The entry structure containing a region, name and block count - * @param[in] data - * The associated data to write to to the created entry - * - * @retval 0 if the entry was created and written successfully - * @retval -1 if the parameters were invalid or the note has no length - * @retval -2 if the Controller Pak wasn't present or was bad - * @retval -3 if there was an error writing to the Controller Pak - * @retval -4 if there wasn't enough space to store the note - * @retval -5 if there is no room in the TOC to add a new entry - */ int write_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t *data ) { uint8_t sector[MEMPAK_BLOCK_SIZE]; @@ -1174,21 +1020,6 @@ int write_mempak_entry_data( int controller, entry_structure_t *entry, uint8_t * return 0; } -/** - * @brief Delete a Controller Pak entry and associated data - * - * Given a valid Controller Pak entry fetched by #get_mempak_entry, removes the entry and frees - * all associated blocks. - * - * @param[in] controller - * The controller (0-3) to delete the note from - * @param[in] entry - * The entry structure that is to be deleted from the Controller Pak - * - * @retval 0 if the entry was deleted successfully - * @retval -1 if the entry was invalid - * @retval -2 if the Controller Pak was bad or not present - */ int delete_mempak_entry( int controller, entry_structure_t *entry ) { entry_structure_t tmp_entry; @@ -1295,5 +1126,3 @@ int delete_mempak_entry( int controller, entry_structure_t *entry ) return 0; } - -/** @} */ /* controller */ diff --git a/src/n64sys.c b/src/n64sys.c index 3b73af3538..9cf12ffa7f 100644 --- a/src/n64sys.c +++ b/src/n64sys.c @@ -16,24 +16,6 @@ #include "rdp.h" #include "utils.h" -/** - * @defgroup n64sys N64 System Interface - * @ingroup lowlevel - * @brief N64 bootup and cache interfaces. - * - * The N64 system interface provides a way for code to interact with - * the memory setup on the system. This includes cache operations to - * invalidate or flush regions and the ability to set the boot CIC. - * The @ref system use the knowledge of the boot CIC to properly determine - * if the expansion pak is present, giving 4 MiB of additional memory. Aside - * from this, the MIPS r4300 uses a manual cache management strategy, where - * SW that requires passing buffers to and from hardware components using - * DMA controllers needs to ensure that cache and RDRAM are in sync. A - * set of operations to invalidate and/or write back cache is provided for - * both instruction cache and data cache. - * @{ - */ - int __boot_memsize; ///< Memory size as detected by IPL3 int __boot_tvtype; ///< TV type as detected by IPL3 int __boot_resettype; ///< Reset type as detected by IPL3 @@ -62,16 +44,6 @@ static uint64_t ticks64_base; } \ }) -/** - * @brief Force a data cache writeback over a memory region - * - * Use this to force cached memory to be written to RDRAM. - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void data_cache_hit_writeback(volatile const void * addr, unsigned long length) { cache_op(0x19, 16); @@ -83,139 +55,48 @@ void __data_cache_hit_invalidate(volatile void * addr, unsigned long length) cache_op(0x11, 16); } -/** - * @brief Force a data cache writeback invalidate over a memory region - * - * Use this to force cached memory to be written to RDRAM - * and then invalidate the corresponding cache lines. - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void data_cache_hit_writeback_invalidate(volatile void * addr, unsigned long length) { cache_op(0x15, 16); } -/** - * @brief Force a data cache index writeback invalidate over a memory region - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void data_cache_index_writeback_invalidate(volatile void * addr, unsigned long length) { cache_op(0x01, 16); } -/** - * @brief Force a data cache writeback invalidate over whole memory - * - * Also see #data_cache_hit_writeback_invalidate - * - */ void data_cache_writeback_invalidate_all(void) { // TODO: do an index op instead for better performance data_cache_hit_writeback_invalidate(KSEG0_START_ADDR, get_memory_size()); } -/** - * @brief Force an instruction cache writeback over a memory region - * - * Use this to force cached memory to be written to RDRAM. - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void inst_cache_hit_writeback(volatile const void * addr, unsigned long length) { cache_op(0x18, 32); } -/** - * @brief Force an instruction cache invalidate over a memory region - * - * Use this to force the N64 to update cache from RDRAM. - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void inst_cache_hit_invalidate(volatile void * addr, unsigned long length) { cache_op(0x10, 32); } -/** - * @brief Force an instruction cache index invalidate over a memory region - * - * @param[in] addr - * Pointer to memory in question - * @param[in] length - * Length in bytes of the data pointed at by addr - */ void inst_cache_index_invalidate(volatile void * addr, unsigned long length) { cache_op(0x00, 32); } -/** - * @brief Force an instruction cache invalidate over whole memory - * - * Also see #inst_cache_hit_invalidate - * - */ void inst_cache_invalidate_all(void) { // TODO: do an index op instead for better performance inst_cache_hit_invalidate(KSEG0_START_ADDR, get_memory_size()); } -/** - * @brief Allocate a buffer that will be accessed as uncached memory. - * - * This function allocates a memory buffer that can be safely read and written - * through uncached memory accesses only. It makes sure that that the buffer - * does not share any cacheline with other buffers in the heap, and returns - * a pointer in the uncached segment (0xA0000000). - * - * The buffer contents are uninitialized. - * - * To free the buffer, use #free_uncached. - * - * @param[in] size The size of the buffer to allocate - * - * @return a pointer to the start of the buffer (in the uncached segment) - * - * @see #free_uncached - */ void *malloc_uncached(size_t size) { return malloc_uncached_aligned(16, size); } -/** - * @brief Allocate a buffer that will be accessed as uncached memory, specifying alignment - * - * This function is similar to #malloc_uncached, but allows to force a higher - * alignment to the buffer (just like memalign does). See #malloc_uncached - * for reference. - * - * @param[in] align The alignment of the buffer in bytes (eg: 64) - * @param[in] size The size of the buffer to allocate - * - * @return a pointer to the start of the buffer (in the uncached segment) - * - * @see #malloc_uncached - */ void *malloc_uncached_aligned(int align, size_t size) { // Since we will be accessing the buffer as uncached memory, we absolutely @@ -238,53 +119,22 @@ void *malloc_uncached_aligned(int align, size_t size) return UncachedAddr(mem); } -/** - * @brief Free an uncached memory buffer - * - * This function frees a memory buffer previously allocated via #malloc_uncached. - * - * @param[in] buf The buffer to free - * - * @see #malloc_uncached - */ void free_uncached(void *buf) { free(CachedAddr(buf)); } -/** - * @brief Get amount of available memory. - * - * @return amount of total available memory in bytes. - */ int get_memory_size() { return __boot_memsize; } -/** - * @brief Is expansion pak in use. - * - * Checks whether the maximum available memory has been expanded to 8 MiB - * - * @return true if expansion pak detected, false otherwise. - * - * @note On iQue, this function returns true only if the game has been assigned - * exactly 8 MiB of RAM. - */ bool is_memory_expanded() { return get_memory_size() >= 0x7C0000; } -/** - * @brief Is system NTSC/PAL/MPAL - * - * Checks enum hard-coded in PIF BootROM to indicate the tv type of the system. - * - * @return enum value indicating PAL, NTSC or MPAL - */ tv_type_t get_tv_type() { return __boot_tvtype; @@ -396,8 +246,6 @@ __attribute__((constructor)) void __init_cop1(void) C1_WRITE_FCR31(fcr31); } -/** @} */ - /* Inline instantiations */ extern inline uint8_t mem_read8(uint64_t vaddr); extern inline uint16_t mem_read16(uint64_t vaddr); diff --git a/src/rtc.c b/src/rtc.c index a3463c7b7b..29b511c9e0 100644 --- a/src/rtc.c +++ b/src/rtc.c @@ -9,92 +9,6 @@ #include "libdragon.h" #include "system.h" -/** - * @defgroup rtc Real-Time Clock Subsystem - * @ingroup peripherals - * @brief Real-time clock interface. - * @author Christopher Bonhage - * - * The Joybus real-time clock is a cartridge peripheral that uses a battery - * to power a clock that tracks the date, time, and day of the week. The - * real-time clock keeps running even when the N64 is powered-off. The - * Joybus RTC is accessed through the serial interface (SI) similar to EEPROM - * and controllers. The Joybus RTC was only ever available on one official - * cartridge that was only available in Japan: Dōbutsu no Mori (Animal Forest). - * Many emulators and flash carts include support for the Animal Forest RTC, - * which makes it possible to include real-time clock functionality in homebrew! - * There is also a real-time clock included in the N64DD hardware, which uses - * a different interface and is not currently supported by libdragon. - * - * To check if the real-time clock is available, call #rtc_init. - * To read the current time from the real-time clock, call #rtc_get. - * Once the RTC subsystem is initialized, you can also use ISO C Time functions - * to get the current time, for example: `time(NULL)` will return the number of - * seconds elapsed since the UNIX epoch (January 1, 1970 at 00:00:00). - * To check if the real-time clock supports writes, call #rtc_is_writable. - * To write a new time to the real-time clock, call #rtc_set. - * - * This subsystem handles decoding and encoding the date/time from its internal - * format into a struct called #rtc_time_t, which contains integer values for - * year, month, day-of-month, day-of-week, hour, minute, and second. - * - * The Joybus RTC contains 3 "blocks" (or zones) which contain 8 bytes of data: - * Block 0 contains a half-word control register and opaque calibration data. - * Block 1 is unused and unsupported. See notes below. - * Block 2 contains the current date/time as packed binary-coded decimal. - * - * Animal Forest did not use block 1 at all, so most emulators do not bother to - * implement it. Theoretically, block 1 could be used as 8-bytes of SRAM-backed - * storage, but this is not supported by libdragon's Real-Time Clock Subsystem. - * If you need storage, consider using a standard cartridge save type or saving - * to a Controller Pak. - * - * (As of July 2021) Joybus RTC does not work in combination with any EEPROM - * save type on EverDrive64 3.0 or X7. To have the best compatibility and player - * experience, it is not recommended to use the EEPROM + RTC ROM configuration. - * This is a bug in the EverDrive64 firmware and not a system limitation imposed - * by the Joybus protocol or Serial Interface. - * - * Unfortunately, since only one game ever used Joybus RTC (and that game was - * later re-released on the GameCube in English), real-time clock support in - * emulators and flash carts can be incomplete, inaccurate, or non-existent. - * Many emulators do not actually implement the Joybus RTC write command and - * always respond with the host device's current local time. Some emulators - * and flash carts support writing to RTC but will not persist the date/time - * after resetting or powering-off. You can run the `rtctest` example ROM on - * your preferred emulator or flash cart to what RTC support is available. - * - * The only reliable way to check if writes are actually supported is to write - * a time to the RTC and read the time back out. Many emulators that do - * support RTC reads will silently ignore RTC writes. You should detect - * whether writes are supported using #rtc_is_writable so that you can - * conditionally show the option to change the time if it's supported. If the - * RTC supports writes, it is safe to call #rtc_set to set the date and time. - * - * Due to the inaccurate and inconsistent behavior of RTC reproductions that - * currently exist, this subsystem trades-off complete accuracy with the actual - * Animal Forest RTC in favor of broader compatibility with the various quirks - * and bugs that exist in real-world scenarios like emulators and flash carts. - * - * Some notable examples of RTC support in the ecosystem (as of July 2021): - * - * 64drive hw2 fully implements Joybus RTC including writes, but requires - * delays after setting the time (see #JOYBUS_RTC_WRITE_FINISHED_DELAY). - * - * EverDrive64 3.0 and X7 partially support Joybus RTC, with caveats: The RTC - * must be explicitly enabled in the OS or with a ROM header configuration; - * RTC will not be detected if the EEPROM save type is used; RTC writes are - * not supported through the SI, so changing the time must be done in the OS. - * - * UltraPIF fully implements an emulated Joybus RTC that can be accessed even - * when the cartridge does not include the real-time clock circuitry. - * - * Special thanks to marshallh and jago85 for their hard work and research - * reverse-engineering and documenting the inner-workings of the Joybus RTC. - * - * @{ - */ - /** * @brief Joybus real-time clock identifier. * @@ -543,25 +457,6 @@ static time_t newlib_time_hook( void ) return mktime( &time ); } -/** - * @brief High-level convenience helper to initialize the RTC subsystem. - * - * The RTC Subsystem depends on the libdragon Timer Subsystem, so make sure - * to call #timer_init before calling #rtc_init! - * - * Some flash carts require the RTC to be explicitly enabled before loading - * the ROM file. Some emulators and flash carts do not support RTC at all. - * - * This function will detect if the RTC is available and if so, will - * prepare the RTC so that the current time can be read from it. - * - * This operation may take up to 50 milliseconds to complete. - * - * This will also hook the RTC into the newlib gettimeofday function, so - * you will be able to use the ISO C time functions if RTC is available. - * - * @return whether the RTC is present and supported by the RTC Subsystem. - */ bool rtc_init( void ) { /* libdragon currently only supports Joybus RTC! */ @@ -584,18 +479,6 @@ bool rtc_init( void ) return true; } -/** - * @brief Close the RTC Subsystem, disabling system hooks. - * - * Unhooks the RTC from the newlib gettimeofday function. - * This will cause subsequent calls to gettimeofday to error with ENOSYS. - * - * The only reason you should ever need to call this is if you need to - * stop the Timer Subsystem, which the RTC Subsystem depends on. If you - * do need to do this, make sure to call #rtc_close BEFORE #timer_close - * and then call #rtc_init again after you call #timer_init to restart - * the Timer Subsystem! - */ void rtc_close( void ) { /* Disable newlib `gettimeofday` integration */ @@ -604,31 +487,6 @@ void rtc_close( void ) rtc_get_cache_ticks = 0; } -/** - * @brief Calculate sane values for arbitrary time inputs. - * - * If your time inputs are already sane, nothing should change. - * This function will clamp date/time values within the expected ranges, - * including the correct day-of-month based on year/month. It will also - * recalculate the day-of-week based on the clamped year/month/day. - * - * This is useful to call while the player is adjusting the time after each - * input to ensure that the date being set always makes sense before they - * actually confirm and commit the updated date/time. The rtctest example - * demonstrates a user-interface for setting the time with live validation. - * - * Internally, RTC cannot represent dates before 1990-01-01, although some - * RTC implementations (like UltraPIF) only support dates after 2000-01-01. - * - * For highest compatibility, it is not recommended to set the date past - * 2038-01-19 03:14:07 UTC, which is the UNIX timestamp Epochalypse. - * - * Special thanks to networkfusion for providing the algorithm to - * calculate day-of-week from an arbitrary date. - * - * @param[in,out] rtc_time - * Pointer to the RTC time data structure - */ void rtc_normalize_time( rtc_time_t * rtc_time ) { /* Clamp date/time values that have static limits */ @@ -665,30 +523,6 @@ void rtc_normalize_time( rtc_time_t * rtc_time ) ) % 7; } -/** - * @brief Read the current date/time from the real-time clock. - * - * If the RTC is not detected or supported, this function will - * not modify the destination rtc_time parameter. - * - * Your code should call this once per frame to update the #rtc_time_t - * data structure. The RTC Subsystem maintains a cache of the - * most-recent RTC time that was read and will only perform an - * actual RTC read command if the cache is invalidated. The - * destination rtc_time parameter will be updated regardless of - * the cache validity. - * - * Cache will invalidate every #RTC_GET_CACHE_INVALIDATE_TICKS. - * Calling #rtc_set will also invalidate the cache. - * - * If an actual RTC read command is needed, this function can take - * a few milliseconds to complete. - * - * @param[out] rtc_time - * Destination pointer for the RTC time data structure - * - * @return whether the rtc_time destination pointer data was modified - */ bool rtc_get( rtc_time_t * rtc_time ) { /* libdragon currently only supports getting the time for Joybus RTC! */ @@ -714,22 +548,6 @@ bool rtc_get( rtc_time_t * rtc_time ) return true; } -/** - * @brief High-level convenience helper to set the RTC date/time. - * - * Prepares the RTC for writing, sets the new time, and resumes the clock. - * - * This function will take approximately 570 milliseconds to complete. - * - * Unfortunately, the best way to ensure that writes to the RTC have - * actually finished is by waiting for a fixed duration. Emulators may not - * accurately reflect this, but this delay is necessary on real hardware. - * - * @param[in] write_time - * Source pointer for the RTC time data structure - * - * @return false if the RTC does not support being set - */ bool rtc_set( rtc_time_t * write_time ) { /* libdragon currently only supports setting the time for Joybus RTC! */ @@ -759,23 +577,6 @@ bool rtc_set( rtc_time_t * write_time ) return true; } -/** - * @brief Determine whether the RTC supports writing the time. - * - * Some emulators and flash carts do not support writing to the RTC, so - * this function makes an attempt to detect silent write failures and will - * return `false` if it is unable to change the time on the RTC. - * - * This function is useful if your program wants to conditionally offer the - * ability to set the time based on hardware/emulator support. - * - * Unfortunately this operation may introduce a slight drift in the clock, - * but it is the only way to determine if the RTC supports the write command. - * - * This operation will take approximately 1 second to complete. - * - * @return whether RTC writes appear to be supported - */ bool rtc_is_writable( void ) { rtc_time_t restore_time; @@ -802,5 +603,3 @@ bool rtc_is_writable( void ) return verified; } - -/** @} */ /* rtc */ diff --git a/src/system.c b/src/system.c index a51971ac69..5d29e8f7e7 100644 --- a/src/system.c +++ b/src/system.c @@ -19,38 +19,6 @@ #include "system.h" #include "n64sys.h" -/** - * @defgroup system newlib Interface Hooks - * @brief System hooks to provide low level threading and filesystem functionality to newlib. - * - * newlib provides all of the standard C libraries for homebrew development. - * In addition to standard C libraries, newlib provides some additional bridging - * functionality to allow POSIX function calls to be tied into libdragon. - * Currently this is used only for filesystems. The newlib interface hooks here - * are mostly stubs that allow homebrew applications to compile. - * - * The sbrk function is responsible for allowing newlib to find the next chunk - * of free space for use with malloc calls. The size of the available heap is - * computed using the memory size computed by the boot code (IPL3), and available - * via #get_memory_size(), which is normally either 4 MiB or 8 MiB if the expansion - * pak is available. - * - * libdragon has defined a custom callback structure for filesystems to use. - * Providing relevant hooks for calls that your filesystem supports and passing - * the resulting structure to #attach_filesystem will hook your filesystem into - * newlib. Calls to POSIX file operations will be passed on to your filesystem - * code if the file prefix matches, allowing code to make use of your filesystyem - * without being rewritten. - * - * For example, your filesystem provides libdragon an interface to access a - * homebrew SD card interface. You register a filesystem with "sd:/" as the prefix - * and then attempt to open "sd://directory/file.txt". The open callback for your - * filesystem will be passed the file "/directory/file.txt". The file handle returned - * will be passed into all subsequent calls to your filesystem until the file is - * closed. - * @{ - */ - /** * @name STDIN/STDOUT/STDERR definitions from unistd.h * @@ -101,8 +69,10 @@ char *__env[1] = { 0 }; void (*__assert_func_ptr)(const char *file, int line, const char *func, const char *failedexpr) = 0; /* Externs from libdragon */ +/// @cond extern void enable_interrupts(); extern void disable_interrupts(); +/// @endcond /** * @brief Filesystem mapping structure @@ -332,30 +302,6 @@ static inline uint32_t __randn( uint32_t *state, int n ) return ((uint64_t)__rand( state ) * n) >> 32; } -/** - * @brief Register a filesystem with newlib - * - * This function will take a prefix in the form of 'prefix:/' and a pointer - * to a filesystem structure of relevant callbacks and register it with newlib. - * Any standard open/fopen calls with the registered prefix will be passed - * to this filesystem. Userspace code does not need to know the underlying - * filesystem, only the prefix that it has been registered under. - * - * The filesystem pointer passed in to this function should not go out of scope - * for the lifetime of the filesystem. - * - * @param[in] prefix - * Prefix of the filesystem - * @param[in] filesystem - * Structure of callbacks for various functions in the filesystem. - * If the registered filesystem doesn't support an operation, it - * should leave the callback null. - * - * @retval -1 if the parameters are invalid - * @retval -2 if the prefix is already in use - * @retval -3 if there are no more slots for filesystems - * @retval 0 if the filesystem was registered successfully - */ int attach_filesystem( const char * const prefix, filesystem_t *filesystem ) { /* Sanity checking */ @@ -417,19 +363,6 @@ int attach_filesystem( const char * const prefix, filesystem_t *filesystem ) return 0; } -/** - * @brief Unregister a filesystem from newlib - * - * @note This function will make sure all files are closed before unregistering - * the filesystem. - * - * @param[in] prefix - * The prefix that was used to register the filesystem - * - * @retval -1 if the parameters were invalid - * @retval -2 if the filesystem couldn't be found - * @retval 0 if the filesystem was successfully unregistered - */ int detach_filesystem( const char * const prefix ) { /* Sanity checking */ @@ -1452,20 +1385,6 @@ int getentropy(uint8_t *buf, size_t buflen) return 0; } -/** - * @brief Find the first file in a directory - * - * This function should be called to start enumerating a directory or whenever - * a directory enumeration should be restarted. - * - * @param[in] path - * Path to the directory structure - * @param[out] dir - * Directory entry structure to populate with first entry - * - * @return 0 on successful lookup, -1 if the directory existed and is empty, - * or a different negative value on error (in which case, errno will be set). - */ int dir_findfirst( const char * const path, dir_t *dir ) { filesystem_t *fs = __get_fs_pointer_by_name( path ); @@ -1492,21 +1411,6 @@ int dir_findfirst( const char * const path, dir_t *dir ) return fs->findfirst( (char *)path + __strlen( filesystems[mapping].prefix ) - 1, dir ); } -/** - * @brief Find the next file in a directory - * - * After finding the first file in a directory using #dir_findfirst, call this to retrieve - * the rest of the directory entries. Call this repeatedly until a negative error is returned - * signifying that there are no more directory entries in the directory. - * - * @param[in] path - * Path to the directory structure - * @param[out] dir - * Directory entry structure to populate with next entry - * - * @return 0 on successful lookup, -1 if there are no more files in the directory, - * or a different negative value on error (in which case, errno will be set). - */ int dir_findnext( const char * const path, dir_t *dir ) { filesystem_t *fs = __get_fs_pointer_by_name( path ); @@ -1557,14 +1461,6 @@ int mkdir( const char * path, mode_t mode ) return fs->mkdir( (char *)path + __strlen( filesystems[mapping].prefix ) - 1, mode ); } -/** - * @brief Hook into stdio for STDIN, STDOUT and STDERR callbacks - * - * @param[in] stdio_calls - * Pointer to structure containing callbacks for stdio functions - * - * @return 0 on successful hook or a negative value on failure. - */ int hook_stdio_calls( stdio_t *stdio_calls ) { if( stdio_calls == NULL ) @@ -1585,14 +1481,6 @@ int hook_stdio_calls( stdio_t *stdio_calls ) return 0; } -/** - * @brief Unhook from stdio - * - * @param[in] stdio_calls - * Pointer to structure containing callbacks for stdio functions - * - * @return 0 on successful hook or a negative value on failure. - */ int unhook_stdio_calls( stdio_t *stdio_calls ) { /* Just wipe out internal variable */ @@ -1607,14 +1495,6 @@ int unhook_stdio_calls( stdio_t *stdio_calls ) return 0; } -/** - * @brief Hook into gettimeofday with a current time callback. - * - * @param[in] time_fn - * Pointer to callback for the current time function - * - * @return 0 if successful or a negative value on failure. - */ int hook_time_call( time_t (*time_fn)( void ) ) { if( time_fn == NULL ) @@ -1627,14 +1507,6 @@ int hook_time_call( time_t (*time_fn)( void ) ) return 0; } -/** - * @brief Unhook from gettimeofday current time callback. - * - * @param[in] time_fn - * Pointer to callback for the current time function - * - * @return 0 if successful or a negative value on failure. - */ int unhook_time_call( time_t (*time_fn)( void ) ) { if( time_hook == time_fn ) @@ -1672,5 +1544,3 @@ void __assert_func(const char *file, int line, const char *func, const char *fai __assert_func_ptr(file, line, func, failedexpr); abort(); } - -/** @} */ diff --git a/src/timer.c b/src/timer.c index 3ae06b0888..8c23f77356 100644 --- a/src/timer.c +++ b/src/timer.c @@ -10,35 +10,6 @@ #include "regsinternal.h" #include "utils.h" -/** - * @defgroup timer Timer Subsystem - * @ingroup libdragon - * @brief Interface to the timer module in the MIPS r4300 processor. - * - * The timer subsystem allows code to receive a callback after a specified - * number of ticks or microseconds. It interfaces with the MIPS - * coprocessor 0 to handle the timer interrupt and provide useful timing - * services. - * - * Before attempting to use the timer subsystem, code should call #timer_init. - * After the timer subsystem has been initialized, a new one-shot or - * continuous timer can be created with #new_timer. To remove an expired - * one-shot timer or a recurring timer, use #delete_timer. To temporarily - * stop a timer, use #stop_timer. To restart a stopped timer or an expired - * one-shot timer, use #start_timer. Once code no longer needs the timer - * subsystem, a call to #timer_close will free all continuous timers and shut - * down the timer subsystem. Note that timers removed with #stop_timer or - * expired one-short timers will not be removed automatically and are the - * responsibility of the calling code to be freed, regardless of a call to - * #timer_close. - * - * Because the MIPS internal counter wraps around after ~90 seconds (see - * TICKS_READ), it's not possible to schedule a timer more than 90 seconds - * in the future. - * - * @{ - */ - /** @brief Refcount of #timer_init vs #timer_close calls. */ static int timer_init_refcount = 0; @@ -198,21 +169,6 @@ static void timer_poll(void) timer_update_compare(TI_timers, TICKS_READ()); } -/** - * @brief Initialize the timer subsystem - * - * This function will reset the COP0 ticks counter to 0. Even if you - * later access the hardware counter directly (via TICKS_READ()), it should not - * be a problem if you call timer_init() early in the application main. - * - * Do not modify the COP0 ticks counter after calling this function. Doing so - * will impede functionality of the timer module. - * - * The timer subsystem tracks the number of times #timer_init is called - * and will only initialize the subsystem on the first call. This reference - * count also applies to #timer_close, which will only close the subsystem - * if it is called the same number of times as #timer_init. - */ void timer_init(void) { // Just increment the refcount if already initialized. @@ -227,21 +183,6 @@ void timer_init(void) enable_interrupts(); } -/** - * @brief Create a new timer and add to list - * - * If you need to associate some data with the timer, consider using - * #new_timer_context to include a pointer in the callback. - * - * @param[in] ticks - * Number of ticks before the timer should fire - * @param[in] flags - * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS and #TF_DISABLED - * @param[in] callback - * Callback function to call when the timer expires - * - * @return A pointer to the timer structure created - */ timer_link_t *new_timer(int ticks, int flags, timer_callback1_t callback) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -270,22 +211,6 @@ timer_link_t *new_timer(int ticks, int flags, timer_callback1_t callback) return timer; } -/** - * @brief Create a new timer with context and add to list - * - * If you don't need the context, consider using #new_timer instead. - * - * @param[in] ticks - * Number of ticks before the timer should fire - * @param[in] flags - * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS and #TF_DISABLED - * @param[in] callback - * Callback function to call when the timer expires - * @param[in] ctx - * Opaque pointer to pass as an argument to callback - * - * @return A pointer to the timer structure created - */ timer_link_t *new_timer_context(int ticks, int flags, timer_callback2_t callback, void *ctx) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -314,21 +239,6 @@ timer_link_t *new_timer_context(int ticks, int flags, timer_callback2_t callback return timer; } -/** - * @brief Start a timer (not currently in the list) - * - * If you need to associate some data with the timer, consider using - * #start_timer_context to include a pointer in the callback. - * - * @param[in] timer - * Pointer to timer structure to reinsert and start - * @param[in] ticks - * Number of ticks before the timer should fire - * @param[in] flags - * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS, and #TF_DISABLED - * @param[in] callback - * Callback function to call when the timer expires - */ void start_timer(timer_link_t *timer, int ticks, int flags, timer_callback1_t callback) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -355,22 +265,6 @@ void start_timer(timer_link_t *timer, int ticks, int flags, timer_callback1_t ca } } -/** - * @brief Start a timer (not currently in the list) with context - * - * If you don't need the context, consider using #start_timer instead. - * - * @param[in] timer - * Pointer to timer structure to reinsert and start - * @param[in] ticks - * Number of ticks before the timer should fire - * @param[in] flags - * Timer flags. See #TF_ONE_SHOT, #TF_CONTINUOUS, and #TF_DISABLED - * @param[in] callback - * Callback function to call when the timer expires - * @param[in] ctx - * Opaque pointer to pass as an argument to callback - */ void start_timer_context(timer_link_t *timer, int ticks, int flags, timer_callback2_t callback, void *ctx) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -397,12 +291,6 @@ void start_timer_context(timer_link_t *timer, int ticks, int flags, timer_callba } } -/** - * @brief Reset a timer and add to list - * - * @param[in] timer - * Pointer to timer structure to reinsert and start - */ void restart_timer(timer_link_t *timer) { if (timer) @@ -422,18 +310,6 @@ void restart_timer(timer_link_t *timer) } } -/** - * @brief Stop a timer and remove it from the list - * - * @note This function does not free a timer structure, use #delete_timer - * to do this. - * - * @note It is safe to call this function from a timer callback, including - * to stop a timer from its own callback. - * - * @param[in] timer - * Timer structure to stop and remove - */ void stop_timer(timer_link_t *timer) { timer_link_t *head; @@ -466,14 +342,6 @@ void stop_timer(timer_link_t *timer) } } -/** - * @brief Remove a timer from the list and delete it - * - * @note It is not safe to call this function from a timer callback. - - * @param[in] timer - * Timer structure to stop, remove and free - */ void delete_timer(timer_link_t *timer) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -484,17 +352,6 @@ void delete_timer(timer_link_t *timer) } } -/** - * @brief Free and close the timer subsystem - * - * This function will ensure all recurring timers are deleted from the list - * before closing. One-shot timers that have expired will need to be - * manually deleted with #delete_timer. - * - * The timer subsystem tracks the number of times #timer_init is called - * and will only close the subsystem if #timer_close is called the same - * number of times. - */ void timer_close(void) { assertf(timer_init_refcount > 0, "timer module not initialized"); @@ -530,16 +387,8 @@ void timer_close(void) enable_interrupts(); } -/** - * @brief Return total ticks since timer was initialized, as a 64-bit counter. - * - * @return Then number of ticks since the timer was initialized - * - */ long long timer_ticks(void) { assertf(timer_init_refcount > 0, "timer module not initialized"); return get_ticks(); } - -/** @} */ diff --git a/src/tpak.c b/src/tpak.c index cc6f069743..8e9cbd7c03 100755 --- a/src/tpak.c +++ b/src/tpak.c @@ -8,34 +8,6 @@ #include "controller.h" #include -/** - * @defgroup transferpak Transfer Pak interface - * @ingroup controller - * @brief Transfer Pak interface - * - * The Transfer Pak interface allows access to Game Boy and Game Boy Color - * cartridges connected through the accessory port of each controller. - * - * Before accessing a Transfer Pak, first call #tpak_init to boot up the - * accessory and ensure that it is in working order. For advanced use-cases, - * #tpak_set_power and #tpak_set_access can also be called directly if you - * need to put the Transfer Pak into a certain mode. You can verify that the - * Transfer Pak is in the correct mode by inspecting the #tpak_get_status flags. - * - * Whenever the Transfer Pak is not in use, it is recommended to power it off - * by calling @ref tpak_set_power "`tpak_set_power(controller, false)`". - * - * You can read the connected Game Boy cartridge's ROM header by calling - * #tpak_get_cartridge_header and validating the result with #tpak_check_header. - * If the ROM header checksum does not match, it is likely that the cartridge - * connection is poor. - * - * You can use #tpak_read and #tpak_write to access the Game Boy cartridge. - * Note that these functions do not account for cartridge bank switching. - * For more information about Game Boy cartridge bank switching, refer to the - * GBDev Pan Docs at https://gbdev.io/pandocs/ - */ - /** * @anchor TPAK_POWER * @name Transfer Pak power control values @@ -69,19 +41,6 @@ /** @brief Transfer Pak cartridge bank size (16 KiB) */ #define TPAK_BANK_SIZE 0x4000 -/** - * @brief Set Transfer Pak or Game Boy cartridge status/control value. - * - * This is an internal helper to set a Transfer Pak status or control setting. - * This function is not suitable for setting individual bytes in Save RAM! - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] address - * Address of the setting. Should be between 0x8000 and 0xBFE0 - * @param[in] value - * A byte of data to fill the write buffer with. - */ int tpak_set_value(int controller, uint16_t address, uint8_t value) { uint8_t block[TPAK_BLOCK_SIZE]; @@ -89,16 +48,6 @@ int tpak_set_value(int controller, uint16_t address, uint8_t value) return joybus_accessory_write(controller, address, block); } -/** - * @brief Prepare a Transfer Pak for read/write commands. - * - * Powers on the Transfer Pak and enables access to the Game Boy cartridge. - * Also performs status checks to confirm the Transfer Pak can be accessed reliably. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_init(int controller) { int result = 0; @@ -119,60 +68,23 @@ int tpak_init(int controller) return 0; } -/** - * @brief Set the access mode flag for a Transfer Pak. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] access_state - * Whether to allow access to the Game Boy cartridge. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_set_access(int controller, bool access_state) { uint8_t value = access_state ? 1 : 0; return tpak_set_value(controller, TPAK_ADDRESS_STATUS, value); } -/** - * @brief Set the power enabled flag for a Transfer Pak. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] power_state - * True to power the Transfer Pak and cartridge on, false to turn it off. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_set_power(int controller, bool power_state) { uint8_t value = power_state ? TPAK_POWER_ON : TPAK_POWER_OFF; return tpak_set_value(controller, TPAK_ADDRESS_POWER, value); } -/** - * @brief Set the cartridge data address memory bank for a Transfer Pak. - * - * Change the bank of address space that is available for #tpak_read and - * #tpak_write between Transfer Pak addresses 0xC000 and 0xFFFF. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] bank - * The bank (0-3) to switch to. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_set_bank(int controller, int bank) { return tpak_set_value(controller, TPAK_ADDRESS_BANK, bank); } -/** - * @brief Get the status flags for a Transfer Pak. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @return The status byte with @ref TPAK_STATUS flags - */ uint8_t tpak_get_status(int controller) { uint8_t block[TPAK_BLOCK_SIZE]; @@ -181,15 +93,6 @@ uint8_t tpak_get_status(int controller) return block[0]; } -/** - * @brief Read the Game Boy cartridge ROM header from a Transfer Pak. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[out] header - * Pointer to destination Game Boy cartridge ROM header data structure. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_get_cartridge_header(int controller, struct gameboy_cartridge_header* header) { // We're interested in 0x0000 - 0x3FFF of gb space. @@ -200,23 +103,6 @@ int tpak_get_cartridge_header(int controller, struct gameboy_cartridge_header* h return tpak_read(controller, address, (uint8_t*) header, sizeof(*header)); } -/** - * @brief Write data from a buffer to a Game Boy cartridge via Transfer Pak. - * - * Save RAM is located between gameboy addresses 0xA000 and 0xBFFF, which is in the Transfer Pak's bank 2. - * This function does not account for cartridge bank switching, so to switch between MBC1 RAM banks, for example, - * you'll need to switch to Tpak bank 1, and write to address 0xE000, which translates to address 0x6000 on the gameboy. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] address - * address in Game Boy cartridge space to write to. - * @param[in] data - * buffer containing the data to write. - * @param[in] size - * length of the buffer. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_write(int controller, uint16_t address, uint8_t* data, uint16_t size) { if (controller < 0 || controller > 3 || size % TPAK_BLOCK_SIZE || address % TPAK_BLOCK_SIZE) @@ -254,19 +140,6 @@ int tpak_write(int controller, uint16_t address, uint8_t* data, uint16_t size) return 0; } -/** - * @brief Read data from a Game Boy cartridge to a buffer via Transfer Pak. - * - * @param[in] controller - * The controller (0-3) with Transfer Pak connected. - * @param[in] address - * address in Game Boy cartridge space to read from. - * @param[in] buffer - * buffer to copy cartridge data into. - * @param[in] size - * length of the data to be read. - * @return 0 if successful or @ref TPAK_ERROR otherwise. - */ int tpak_read(int controller, uint16_t address, uint8_t* buffer, uint16_t size) { if (controller < 0 || controller > 3 || size % TPAK_BLOCK_SIZE || address % TPAK_BLOCK_SIZE) @@ -304,13 +177,6 @@ int tpak_read(int controller, uint16_t address, uint8_t* buffer, uint16_t size) return 0; } -/** - * @brief Verify a Game Boy cartridge ROM header checksum. - * - * Confirms that the Transfer Pak is connected and working properly. - * - * @param[in] header The Game Boy ROM header to check. - */ bool tpak_check_header(struct gameboy_cartridge_header* header) { uint8_t sum = 0; From dc3b17f9ccf6058d3a516685bd31eaf44d399b86 Mon Sep 17 00:00:00 2001 From: Giovanni Bajo Date: Tue, 11 Jun 2024 22:06:31 +0200 Subject: [PATCH 48/48] mempak: fix doxygen --- include/mempak.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/mempak.h b/include/mempak.h index 4a6384a76b..0eb7c8c1f5 100644 --- a/include/mempak.h +++ b/include/mempak.h @@ -7,7 +7,7 @@ #define __LIBDRAGON_MEMPAK_H /** - * @defgroup cpak Controller Pak Filesystem Routines + * @defgroup controllerpak Controller Pak Filesystem Routines * @ingroup controller * @brief Managed Controller Pak interface. *