diff --git a/target/fpga/Makefile b/target/fpga/Makefile index b15e540ff..b5addbb0f 100644 --- a/target/fpga/Makefile +++ b/target/fpga/Makefile @@ -7,20 +7,18 @@ MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) MKFILE_DIR := $(dir $(MKFILE_PATH)) ROOT := ${MKFILE_DIR}../../../.. -CVA6_SDK ?= ${ROOT}/../cva6-sdk +CVA6_SDK ?= DEBUG ?= 0 EXT_JTAG ?= 0 -VCU ?= 01 -FPGA_ID := 091847100576A -HW_SERVER := bordcomputer:3231 - -# Select VCU128-02 -ifeq ($(VCU),02) - FPGA_ID := 091847100638A - HW_SERVER := bordcomputer:3232 -endif +FPGA_PATH := $(XILINX_FPGA_PATH) +HW_SERVER := $(XILINX_HOST):$(XILINX_PORT) +BENDER ?= bender +VIVADO ?= vitis-2020.2 vivado +# Do not proceed with implem (CI) +XILINX_SYNTHESIS_ONLY ?= 0 VIVADO ?= vivado +VIVADO_ARGS := XILINX_SYNTHESIS_ONLY=$(XILINX_SYNTHESIS_ONLY) MKIMAGE ?= $(CURDIR)/br2_external/install/bin/mkimage NPROC ?= $(shell nproc) @@ -31,33 +29,36 @@ LINUX_UIMAGE ?= ${CVA6_SDK}/uImage DTB = bootrom/occamy.dtb +BENDER_TARGETS += -t cv64a6_imafdc_sv39 -t occamy +ifeq ($(EXT_JTAG), 0) + BENDER_TARGETS += -t bscane +endif + default: all all: occamy_vcu128 -include $(ROOT)/util/Makefrag - vivado_ips/occamy_xilinx: - ${MAKE} -C vivado_ips occamy_xilinx + ${MAKE} -C vivado_ips occamy_xilinx DEBUG=$(DEBUG) EXT_JTAG=$(EXT_JTAG) bootrom/bootrom-spl.coe: ${MAKE} -C bootrom occamy_vcu128: vivado_ips/occamy_xilinx bootrom/bootrom-spl.coe define_defines_includes_no_simset.tcl - ${VIVADO} -mode batch -source occamy_vcu128.tcl -tclargs $(DEBUG) $(EXT_JTAG) $(NPROC) ${MKFILE_DIR}/bootrom/bootrom-spl.coe + $(VIVADO_ARGS) ${VIVADO} -mode gui -source occamy_vcu128.tcl -tclargs $(DEBUG) $(EXT_JTAG) $(NPROC) ${MKFILE_DIR}/bootrom/bootrom-spl.coe define_defines_includes_no_simset.tcl: $(BENDER_FILES) - ${BENDER} script vivado -t cv64a6_imafdc_sv39 --only-defines --only-includes --no-simset > $@ + ${BENDER} script vivado $(BENDER_TARGETS) --only-defines --only-includes --no-simset > $@ program: ${VIVADO} -mode batch -source occamy_vcu128_program.tcl -tclargs ${VCU} flash: ${FILE} - ${VIVADO} -mode batch -source occamy_vcu128_flash.tcl -tclargs ${HW_SERVER} ${FPGA_ID} flash.mcs ${OFFSET} ${FILE} + ${VIVADO} -mode batch -source occamy_vcu128_flash.tcl -tclargs ${HW_SERVER} ${FPGA_PATH} flash.mcs ${OFFSET} ${FILE} rm flash.mcs # Flash only uboot (made for TFTP bootmode), overwrite the bootrom, and reset the board flashrun: ${UBOOT_ITB} - ${VIVADO} -mode batch -source occamy_vcu128_flashrun.tcl -tclargs ${HW_SERVER} ${FPGA_ID} flash.mcs 6000000 ${UBOOT_ITB} + ${VIVADO} -mode batch -source occamy_vcu128_flashrun.tcl -tclargs ${HW_SERVER} ${FPGA_PATH} flash.mcs 6000000 ${UBOOT_ITB} rm flash.mcs flash-u-boot: diff --git a/target/fpga/bootrom/Makefile b/target/fpga/bootrom/Makefile index ca7d382b5..ff8cbe736 100644 --- a/target/fpga/bootrom/Makefile +++ b/target/fpga/bootrom/Makefile @@ -6,9 +6,7 @@ MKFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) MKFILE_DIR := $(dir $(MKFILE_PATH)) -ROOT := ${MKFILE_DIR}../../../../.. - -include $(ROOT)/util/Makefrag +ROOT := ${MKFILE_DIR}../../.. CVA6_SDK ?= $(ROOT)/../cva6-sdk UBOOT_SPL_BIN ?= $(CVA6_SDK)/u-boot/spl/u-boot-spl.bin @@ -28,7 +26,7 @@ CFLAGS = -Os -g -Werror -ffreestanding -fno-strict-aliasing CFLAGS += -static -nostartfiles -nostartfiles CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls -fno-stack-protector CFLAGS += -mno-save-restore -mstrict-align -CFLAGS += -mabi=lp64d -march=rv64imafd +CFLAGS += -mabi=lp64d -march=rv64imafd_zifencei CFLAGS += -mcmodel=medany GIT_SHA := $(shell git describe --match=NeVeRmAtCh --always --abbrev=10 --dirty) @@ -40,8 +38,8 @@ all: bootrom-spl.coe bootrom.tcl bootrom-spl.tcl $(CC) $(CFLAGS) -DGIT_SHA=\"$(GIT_SHA)\" $(INCLUDES) -c $< -o $@ @echo "CC <= $<" -%.dtb: %.dts - dtc -I dts $< -O dtb -o $@ +%.dtb: %.dts $(wildcard *.dtsi) + dtc -@ -I dts $< -O dtb -o $@ bootrom.elf bootrom.dump bootrom.bin: bootrom.S $(OBJS_C) bootrom.ld occamy.dtb $(CC) $(CFLAGS) $(INCLUDES) -Tbootrom.ld $< $(OBJS_C) -o bootrom.elf @@ -67,4 +65,5 @@ clean: %.tcl: %.bin @echo "TCL <= $<" @$(call BINRAY_SIZE_CHECK,$<,1000000) - @$(BIN2JTAG) -c32 -b 0 -d hw_axi_1 $< > $@ + python3 $(ROOT)/util/bin2jtag.py -c32 -b 0 -d hw_axi_1 $< > $@ + diff --git a/target/fpga/bootrom/mac_address.dtsi b/target/fpga/bootrom/mac_address.dtsi new file mode 100644 index 000000000..8ad365a59 --- /dev/null +++ b/target/fpga/bootrom/mac_address.dtsi @@ -0,0 +1,2 @@ +local-mac-address = [ 00 00 00 00 00 00 ]; +mac-address = [ 00 00 00 00 00 00 ]; diff --git a/target/fpga/bootrom/occamy.dts b/target/fpga/bootrom/occamy.dts index 5ecfd3305..0e8e785a2 100644 --- a/target/fpga/bootrom/occamy.dts +++ b/target/fpga/bootrom/occamy.dts @@ -25,7 +25,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - snitch_mem: buffer@c0000000 { + snitch_mem: l3_mem@c0000000 { reg = <0x0 0xc0000000 0x0 0x10000000>; }; }; @@ -63,7 +63,7 @@ soc: soc { #address-cells = <2>; #size-cells = <2>; - compatible = "simple-bus"; + compatible = "eth,occamy-soc", "simple-bus"; ranges; debug@0 { compatible = "riscv,debug-013"; @@ -191,9 +191,8 @@ clock-names = "s_axi_lite_clk", "axis_clk"; // interrupt and mac_irq interrupts-extended = <&PLIC0 1 &PLIC0 6>; - // local-mac-address = [ 00 0A 35 04 E1 60 ]; // hero-vcu128-01 - local-mac-address = [ 00 0A 35 04 E1 52 ]; // hero-vcu128-02 - mac-address = [ 00 0A 35 04 E1 52 ]; + /include/ "mac_address.dtsi" + /include/ "remote_boot.dtsi" device_type = "network"; axistream-connected = <ð_dma0>; axistream-control-connected = <ð_dma0>; @@ -238,6 +237,12 @@ reg-names = "quadrant-control"; reg = <0x0 0x0b000000 0x0 0x10000>; }; + // We do not use the spm-narrow (contains OpenSBI code) + spm_wide: spm-wide@71000000 { + compatible = "eth,occamy-spm-wide"; + reg-names = "spm-wide"; + reg = <0x0 0x71000000 0x0 0x100000>; + }; // Instantiate a snitch cluster snitch-cluster@10000000 { compatible = "eth,snitch-cluster"; diff --git a/target/fpga/bootrom/occamy_pcie.dts b/target/fpga/bootrom/occamy_pcie.dts new file mode 100644 index 000000000..6873bf14d --- /dev/null +++ b/target/fpga/bootrom/occamy_pcie.dts @@ -0,0 +1,151 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + + +// TODO(niwis) auto generate +/dts-v1/; +/plugin/; +&{/dev@0,0} { + axi-bus { + #address-cells = <1>; + #size-cells = <2>; + compatible = "simple-bus"; + ranges; + // Create a reserved memory region for Snitch program memory + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + snitch_mem: buffer@c0000000 { + reg = <0x0 0xc0000000 0x0 0x10000000>; + }; + }; + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <12500000>; + CPU0: cpu@0 { + device_type = "cpu"; + status = "okay"; + compatible = "eth,ariane", "riscv"; + clock-frequency = <25000000>; + riscv,isa = "rv64fimafd"; + mmu-type = "riscv,sv39"; + tlb-split; + reg = <0>; + // represents the destination of the mcause bits + // ariane has 3 interrupt inputs: + // - software (ipi_i[0], IRQ_M_SOFT) + // - timer (time_irq_i[0], IRQ_M_TIMER) + // - external (irq_i[1:0], {IRQ_S_EXT, IRQ_M_EXT}) + CPU0_intc: interrupt-controller { + #interrupt-cells = <1>; + #address-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + }; + sysclk: virt_25mhz { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <25000000>; + }; + soc: soc { + #address-cells = <2>; + #size-cells = <2>; + compatible = "eth,occamy-soc", "simple-bus"; + ranges; + debug@0 { + compatible = "riscv,debug-013"; + // interrupts-extended = <&CPU0_intc 65535>; + reg-names = "control"; + reg = <0x0 0x0 0x0 0x1000>; + }; + serial@2002000 { + compatible = "ns16550a"; + reg = <0x0 0x2002000 0x0 0x1000>; + clock-frequency = <25000000>; + current-speed = <115200>; + interrupt-parent = <&PLIC0>; + interrupts = <36>; + reg-offset = <0>; + reg-shift = <2>; // regs are spaced on 32 bit boundary + reg-io-width = <4>; // only 32-bit access are supported + // fifo-size = <64>; + }; + timer@2006000 { + compatible = "pulp,apb_timer"; + interrupt-parent = <&PLIC0>; + interrupts = <0x00000068 0x00000069 0x00000070 0x00000071>; + reg = <0x00000000 0x2006000 0x00000000 0x00001000>; + reg-names = "control"; + }; + clint0: clint@4000000 { + clock-frequency = <12500000>; + compatible = "riscv,clint0"; + // clint generates software and timer interrupts to the core. Attach them + // to the CPU + // bits in mip and exception code in mcause: + // - IRQ_M_SOFT = 3: Machine software interrupt + // - IRQ_M_TIMER = 7: Machine timer interrupt + interrupts-extended = <&CPU0_intc 3 &CPU0_intc 7>; + reg-names = "clint"; + reg = <0x0 0x4000000 0x0 0x100000>; + }; + PLIC0: interrupt-controller@c000000 { + compatible = "riscv,plic0"; + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + // PLIC generates external interrupts to the core, M and S mode + // - IRQ_M_EXT = 11: Machine external interrupt + // - IRQ_S_EXT = 9: Supervisor external interrupt + interrupts-extended = <&CPU0_intc 11 &CPU0_intc 9>; + riscv,max-priority = <6>; + riscv,ndev = <72>; + reg = <0x0 0xc000000 0x0 0x4000000>; + }; + soc_ctl0: soc-control@2000000 { + compatible = "eth,occamy-soc-control"; + reg-names = "soc-control"; + reg = <0x0 0x02000000 0x0 0x1000>; + }; + quadrant_ctrl0: quadrant-control@b000000 { + compatible = "eth,occamy-quadrant-control"; + reg-names = "quadrant-control"; + reg = <0x0 0x0b000000 0x0 0x10000>; + }; + // We do not use the spm-narrow (contains OpenSBI code) + spm_wide: spm-wide@71000000 { + compatible = "eth,occamy-spm-wide"; + reg-names = "spm-wide"; + reg = <0x0 0x71000000 0x0 0x100000>; + }; + pcie_axi_bar_mem: pcie-axi-bar-mem@20000000 { + compatible = "eth,pcie-axi-bar-mem"; + reg = <0x0 0x20000000 0x0 0x40000000>; + }; + // Instantiate a snitch cluster + snitch-cluster@10000000 { + compatible = "eth,snitch-cluster"; + // TCDM and Peripheral spaces + reg = <0x0 0x10000000 0x0 0x40000>; + // points to a memory region reserved for use by the cluster + memory-region = <&snitch_mem>; + // cluster specific properties + eth,compute-cores = <8>; + eth,dm-cores = <1>; + eth,quadrant-idx = <0>; + eth,cluster-idx = <0>; // Used to calculate offsets in clint, soc-ctrl etc.. + // A handle to the soc-control register where isolates etc are located + eth,soc-ctl = <&soc_ctl0>; + // Handle to the associated quadrant controller + eth,quadrant-ctrl = <&quadrant_ctrl0>; + // handle to the clint where IPI interrupts are attached + eth,clint = <&clint0>; + }; + }; + }; +}; diff --git a/target/fpga/bootrom/remote_boot.dtsi b/target/fpga/bootrom/remote_boot.dtsi new file mode 100644 index 000000000..e69de29bb diff --git a/target/fpga/bootrom/src/main.c b/target/fpga/bootrom/src/main.c index 92da79812..987d07035 100644 --- a/target/fpga/bootrom/src/main.c +++ b/target/fpga/bootrom/src/main.c @@ -79,7 +79,7 @@ int main() { // Copy the DBT at (SPM+4) print_uart("\r\nCopying DTB at "); - print_uart_addr(SPL_DEST + 1); + print_uart_addr(SPL_DEST + 4); for (int i = 0; i < totalsize; i++) *(uint8_t *)(SPL_DEST + 4 + i) = *(((uint8_t *)__dtb_start) + i); diff --git a/target/fpga/occamy_vcu128.tcl b/target/fpga/occamy_vcu128.tcl index ebbb16f5b..d411cae6a 100644 --- a/target/fpga/occamy_vcu128.tcl +++ b/target/fpga/occamy_vcu128.tcl @@ -99,6 +99,12 @@ if {[get_property PROGRESS [get_run $run]] != "100%"} { puts "Skipping 100% complete run: $run" } +if {[info exists ::env(XILINX_SYNTHESIS_ONLY)] && $::env(XILINX_SYNTHESIS_ONLY)==1} { + puts "XILINX_SYNTHESIS_ONLY is set, stopping now..." + exit +} + + # Create ILA. Attach all signals that were previously marked debug. # For occamy-internal signals: Add "(* mark_debug = "true" *)" before signal definition in HDL code. # For blockdesign-level signals: Use "set_property HDL_ATTRIBUTE.DEBUG $DEBUG [get_bd_nets ...]" in occamy_vcu128_bd.tcl diff --git a/target/fpga/occamy_vcu128_bd.tcl b/target/fpga/occamy_vcu128_bd.tcl index 9774386c1..77046e896 100644 --- a/target/fpga/occamy_vcu128_bd.tcl +++ b/target/fpga/occamy_vcu128_bd.tcl @@ -1,6 +1,3 @@ -# Copyright 2022 ETH Zurich and University of Bologna. -# Licensed under the Apache License, Version 2.0, see LICENSE for details. -# SPDX-License-Identifier: Apache-2.0 ################################################################ # This is a generated script based on design: occamy_vcu128 @@ -666,8 +663,8 @@ proc create_root_design { parentCell } { # Create instance: smc_pcie, and set properties set smc_pcie [ create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smc_pcie ] set_property -dict [ list \ - CONFIG.NUM_CLKS {1} \ - CONFIG.NUM_MI {5} \ + CONFIG.NUM_CLKS {2} \ + CONFIG.NUM_MI {7} \ CONFIG.NUM_SI {1} \ ] $smc_pcie @@ -675,7 +672,7 @@ proc create_root_design { parentCell } { set smc_spcie [ create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smc_spcie ] set_property -dict [ list \ CONFIG.NUM_CLKS {2} \ - CONFIG.NUM_SI {5} \ + CONFIG.NUM_SI {4} \ ] $smc_spcie # Create instance: util_ds_buf, and set properties @@ -713,11 +710,18 @@ proc create_root_design { parentCell } { CONFIG.axist_bypass_scale {Gigabytes} \ CONFIG.axist_bypass_size {4} \ CONFIG.axisten_freq {125} \ - CONFIG.functional_mode {DMA} \ + CONFIG.bar_indicator {BAR_1:0} \ + CONFIG.functional_mode {AXI_Bridge} \ + CONFIG.pf0_bar0_64bit {true} \ + CONFIG.pf0_bar0_prefetchable {true} \ + CONFIG.pf0_bar0_scale {Gigabytes} \ + CONFIG.pf0_bar0_size {4} \ CONFIG.pf0_device_id {9014} \ + CONFIG.pf0_msix_cap_pba_bir {BAR_1:0} \ + CONFIG.pf0_msix_cap_table_bir {BAR_1:0} \ CONFIG.pl_link_cap_max_link_width {X4} \ CONFIG.xdma_axi_intf_mm {AXI_Memory_Mapped} \ - CONFIG.xdma_axilite_slave {false} \ + CONFIG.xdma_axilite_slave {true} \ ] $xdma_0 # Create instance: xlslice_0, and set properties @@ -772,8 +776,9 @@ proc create_root_design { parentCell } { connect_bd_intf_net -intf_net smc_hbm_6_M00_AXI [get_bd_intf_pins hbm_0/SAXI_24] [get_bd_intf_pins smc_hbm_6/M00_AXI] connect_bd_intf_net -intf_net smc_hbm_7_M00_AXI [get_bd_intf_pins hbm_0/SAXI_28] [get_bd_intf_pins smc_hbm_7/M00_AXI] connect_bd_intf_net -intf_net smc_pcie_M05_AXI [get_bd_intf_pins axi_iic_0/S_AXI] [get_bd_intf_pins smc_pcie/M00_AXI] - connect_bd_intf_net -intf_net xdma_0_M_AXI [get_bd_intf_pins smc_spcie/S03_AXI] [get_bd_intf_pins xdma_0/M_AXI] - connect_bd_intf_net -intf_net xdma_0_M_AXI_BYPASS [get_bd_intf_pins smc_spcie/S04_AXI] [get_bd_intf_pins xdma_0/M_AXI_BYPASS] + connect_bd_intf_net -intf_net smc_pcie_M05_AXI1 [get_bd_intf_pins smc_pcie/M05_AXI] [get_bd_intf_pins xdma_0/S_AXI_B] + connect_bd_intf_net -intf_net smc_pcie_M06_AXI [get_bd_intf_pins smc_pcie/M06_AXI] [get_bd_intf_pins xdma_0/S_AXI_LITE] + connect_bd_intf_net -intf_net xdma_0_M_AXI_B [get_bd_intf_pins smc_spcie/S03_AXI] [get_bd_intf_pins xdma_0/M_AXI_B] connect_bd_intf_net -intf_net xdma_0_pcie_mgt [get_bd_intf_ports pci_express_x4] [get_bd_intf_pins xdma_0/pcie_mgt] # Create port connections @@ -819,7 +824,7 @@ proc create_root_design { parentCell } { connect_bd_net -net util_ds_buf_IBUF_DS_ODIV2 [get_bd_pins util_ds_buf/IBUF_DS_ODIV2] [get_bd_pins xdma_0/sys_clk] connect_bd_net -net util_ds_buf_IBUF_OUT [get_bd_pins util_ds_buf/IBUF_OUT] [get_bd_pins xdma_0/sys_clk_gt] connect_bd_net -net util_reduced_logic_0_Res [get_bd_pins psr_100/ext_reset_in] [get_bd_pins psr_25/ext_reset_in] [get_bd_pins psr_hbm/ext_reset_in] [get_bd_pins rst_or/Res] - connect_bd_net -net xdma_0_axi_aclk [get_bd_pins smc_spcie/aclk1] [get_bd_pins xdma_0/axi_aclk] + connect_bd_net -net xdma_0_axi_aclk [get_bd_pins smc_pcie/aclk1] [get_bd_pins smc_spcie/aclk1] [get_bd_pins xdma_0/axi_aclk] connect_bd_net -net xlconcat_0_dout [get_bd_pins concat_rst/dout] [get_bd_pins rst_or/Op1] connect_bd_net -net xlconcat_1_dout [get_bd_pins concat_irq/dout] [get_bd_pins occamy/ext_irq_i] connect_bd_net -net xlconcat_2_dout [get_bd_pins concat_rst_core/dout] [get_bd_pins rst_or_core/Op1] @@ -875,8 +880,9 @@ proc create_root_design { parentCell } { assign_bd_address -offset 0x0011F0000000 -range 0x10000000 -target_address_space [get_bd_addr_spaces occamy/m_axi_hbm_7] [get_bd_addr_segs hbm_0/SAXI_28/HBM_MEM31] -force assign_bd_address -offset 0x4CC00000 -range 0x00400000 -target_address_space [get_bd_addr_spaces occamy/m_axi_pcie] [get_bd_addr_segs hbm_0/SAPB_0/Reg] -force assign_bd_address -offset 0x4C800000 -range 0x00400000 -target_address_space [get_bd_addr_spaces occamy/m_axi_pcie] [get_bd_addr_segs hbm_0/SAPB_1/Reg] -force - assign_bd_address -offset 0x00000000 -range 0x0001000000000000 -target_address_space [get_bd_addr_spaces xdma_0/M_AXI] [get_bd_addr_segs occamy/s_axi_pcie/reg0] -force - assign_bd_address -offset 0x00000000 -range 0x0001000000000000 -target_address_space [get_bd_addr_spaces xdma_0/M_AXI_BYPASS] [get_bd_addr_segs occamy/s_axi_pcie/reg0] -force + assign_bd_address -offset 0x20000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces occamy/m_axi_pcie] [get_bd_addr_segs xdma_0/S_AXI_B/BAR0] -force + assign_bd_address -offset 0x4E000000 -range 0x01000000 -target_address_space [get_bd_addr_spaces occamy/m_axi_pcie] [get_bd_addr_segs xdma_0/S_AXI_LITE/CTL0] -force + assign_bd_address -offset 0x00000000 -range 0x0001000000000000 -target_address_space [get_bd_addr_spaces xdma_0/M_AXI_B] [get_bd_addr_segs occamy/s_axi_pcie/reg0] -force # Exclude Address Segments exclude_bd_addr_seg -offset 0xC0000000 -range 0x10000000 -target_address_space [get_bd_addr_spaces occamy/m_axi_hbm_0] [get_bd_addr_segs hbm_0/SAXI_00/HBM_MEM04] @@ -1164,7 +1170,6 @@ proc create_root_design { parentCell } { # Restore current instance current_bd_instance $oldCurInst - validate_bd_design save_bd_design } # End of create_root_design() @@ -1177,3 +1182,5 @@ proc create_root_design { parentCell } { create_root_design "" +common::send_gid_msg -ssname BD::TCL -id 2053 -severity "WARNING" "This Tcl script was generated from a block design that has not been validated. It is possible that design <$design_name> may result in errors during validation." + diff --git a/target/fpga/occamy_vcu128_flash.tcl b/target/fpga/occamy_vcu128_flash.tcl index 22c3e5964..c3efb6fab 100644 --- a/target/fpga/occamy_vcu128_flash.tcl +++ b/target/fpga/occamy_vcu128_flash.tcl @@ -8,17 +8,17 @@ # Programs the SPI Flash of the VCU128 board with with two partitions # # HW_SERVER host:port URL to the server where the FPGA board is connected to -# FPGA_ID Serial of the FPGA to target +# FPGA_PATH Serial of the FPGA to target # MCS Output flash configuration file # OFFSET0 Address offset of partition 0 # FILE0 File to program to partition 0 # Parse arguments if {$argc < 5} { - error "usage: occamy_vcu_138_flash.tcl HW_SERVER FPGA_ID MCS OFFSET0 FILE0" + error "usage: occamy_vcu_138_flash.tcl HW_SERVER FPGA_PATH MCS OFFSET0 FILE0" } set HW_SERVER [lindex $argv 0] -set FPGA_ID [lindex $argv 1] +set FPGA_PATH [lindex $argv 1] set MCS [lindex $argv 2] set OFFSET0 [lindex $argv 3] set FILE0 [lindex $argv 4] @@ -38,8 +38,8 @@ write_cfgmem -force -format mcs -size 256 -interface SPIx4 \ # Open and connect HW manager open_hw_manager connect_hw_server -url ${HW_SERVER} -allow_non_jtag -current_hw_target [get_hw_targets */xilinx_tcf/Xilinx/${FPGA_ID}] -set_property PARAM.FREQUENCY 15000000 [get_hw_targets */xilinx_tcf/Xilinx/${FPGA_ID}] +current_hw_target [get_hw_targets ${FPGA_PATH}] +set_property PARAM.FREQUENCY 15000000 [get_hw_targets ${FPGA_PATH}] open_hw_target current_hw_device [get_hw_devices xcvu37p_0] diff --git a/target/fpga/occamy_vcu128_flashrun.tcl b/target/fpga/occamy_vcu128_flashrun.tcl index 9f9d59508..e14e33724 100644 --- a/target/fpga/occamy_vcu128_flashrun.tcl +++ b/target/fpga/occamy_vcu128_flashrun.tcl @@ -9,7 +9,7 @@ # Afterwards programs the real bitstream and runs the bootrom # # HW_SERVER host:port URL to the server where the FPGA board is connected to -# FPGA_ID Serial of the FPGA to target +# FPGA_PATH Serial of the FPGA to target # MCS Output flash configuration file # OFFSET0 Address offset of partition 0 # FILE0 File to program to partition 0 @@ -18,10 +18,10 @@ source occamy_vcu128_procs.tcl # Parse arguments if {$argc < 5} { - error "usage: occamy_vcu_138_flash.tcl HW_SERVER FPGA_ID MCS OFFSET0 FILE0" + error "usage: occamy_vcu_138_flash.tcl HW_SERVER FPGA_PATH MCS OFFSET0 FILE0" } set HW_SERVER [lindex $argv 0] -set FPGA_ID [lindex $argv 1] +set FPGA_PATH [lindex $argv 1] set MCS [lindex $argv 2] set OFFSET0 [lindex $argv 3] set FILE0 [lindex $argv 4] @@ -37,8 +37,9 @@ write_cfgmem -force -format mcs -size 256 -interface SPIx4 \ # Open and connect HW manager open_hw_manager connect_hw_server -url ${HW_SERVER} -allow_non_jtag -current_hw_target [get_hw_targets */xilinx_tcf/Xilinx/${FPGA_ID}] -set_property PARAM.FREQUENCY 15000000 [get_hw_targets */xilinx_tcf/Xilinx/${FPGA_ID}] + +current_hw_target [get_hw_targets *${FPGA_PATH}] +set_property PARAM.FREQUENCY 10000000 [get_hw_targets *${FPGA_PATH}] open_hw_target current_hw_device [get_hw_devices xcvu37p_0] @@ -66,11 +67,11 @@ program_hw_cfgmem -hw_cfgmem $hw_cfgmem # Program BIT global occ_hw_server -global occ_target_serial +global occ_target_path global occ_hw_device global occ_bit_stem set occ_hw_server $HW_SERVER -set occ_target_serial $FPGA_ID +set occ_target_path $FPGA_PATH set occ_hw_device xcvu37p_0 set occ_bit_stem occamy_vcu128/occamy_vcu128.runs/impl_1/occamy_vcu128_wrapper diff --git a/target/fpga/occamy_vcu128_impl.xdc b/target/fpga/occamy_vcu128_impl.xdc index 509ea040d..a9b970de3 100644 --- a/target/fpga/occamy_vcu128_impl.xdc +++ b/target/fpga/occamy_vcu128_impl.xdc @@ -15,6 +15,11 @@ set_property IOSTANDARD LVCMOS18 [get_ports uart_rx_i_0] set_property PACKAGE_PIN BN26 [get_ports uart_tx_o_0] set_property IOSTANDARD LVCMOS18 [get_ports uart_tx_o_0] +# Assume no glitchless mux needs to be clock capable (causes pb on resets) +set all_in_mux [get_nets -of [ get_pins -filter { DIRECTION == IN } -of [get_cells -hier -filter { ORIG_REF_NAME == tc_clk_mux2 || REF_NAME == tc_clk_mux2 }]]] +set_property CLOCK_DEDICATED_ROUTE FALSE $all_in_mux +set_property CLOCK_BUFFER_TYPE NONE $all_in_mux + # CPU_RESET pushbutton switch set_false_path -from [get_port reset] -to [all_registers] set_property PACKAGE_PIN BM29 [get_ports reset] diff --git a/target/fpga/occamy_vcu128_procs.tcl b/target/fpga/occamy_vcu128_procs.tcl index 7a3a90c9f..12c9ede1a 100644 --- a/target/fpga/occamy_vcu128_procs.tcl +++ b/target/fpga/occamy_vcu128_procs.tcl @@ -24,14 +24,25 @@ proc target_02 {} { set occ_bit_stem occamy_vcu128/occamy_vcu128.runs/impl_1/occamy_vcu128_wrapper } +proc target_03 {} { + global occ_hw_server + global occ_target_serial + global occ_hw_device + global occ_bit_stem + set occ_hw_server bordcomputer:3233 + set occ_target_serial 12309159258A + set occ_hw_device xcvu37p_0 + set occ_bit_stem occamy_vcu128/occamy_vcu128.runs/impl_1/occamy_vcu128_wrapper +} + proc occ_connect { } { global occ_hw_server - global occ_target_serial + global occ_target_path global occ_hw_device open_hw_manager connect_hw_server -url ${occ_hw_server} -allow_non_jtag - current_hw_target [get_hw_targets */xilinx_tcf/Xilinx/${occ_target_serial}] - set_property PARAM.FREQUENCY 15000000 [get_hw_targets */xilinx_tcf/Xilinx/${occ_target_serial}] + current_hw_target [get_hw_targets ${occ_target_path}] + set_property PARAM.FREQUENCY 15000000 [get_hw_targets ${occ_target_path}] open_hw_target current_hw_device [get_hw_devices ${occ_hw_device}] refresh_hw_device -update_hw_probes false [lindex [get_hw_devices ${occ_hw_device}] 0] @@ -144,4 +155,4 @@ proc occ_flash_spi { mcs_file flash_offset flash_file } { # Program SPI flash puts "Programing SPI flash" program_hw_cfgmem -hw_cfgmem $hw_cfgmem -} \ No newline at end of file +} diff --git a/target/fpga/vivado_ips/Makefile b/target/fpga/vivado_ips/Makefile index 96f4ce7b1..212389e4e 100644 --- a/target/fpga/vivado_ips/Makefile +++ b/target/fpga/vivado_ips/Makefile @@ -9,18 +9,15 @@ MKFILE_DIR := $(dir $(MKFILE_PATH)) ROOT := ${MKFILE_DIR}../../../../.. DEBUG ?= 0 -VIVADO ?= vivado -BENDER ?= bender +BENDER ?= bender +VIVADO ?= vitis-2020.2 vivado -BENDER_TARGETS += -t cv64a6_imafdc_sv39 +BENDER_TARGETS += -t cv64a6_imafdc_sv39 -t occamy ifeq ($(EXT_JTAG), 0) BENDER_TARGETS += -t bscane endif -include $(ROOT)/util/Makefrag - occamy_xilinx: define-sources.tcl - $(MAKE) -C ../../ update-source ${VIVADO} -mode batch -source occamy_xilinx.tcl -tclargs $(DEBUG) $(EXT_JTAG) define-sources.tcl: diff --git a/target/fpga/vivado_ips/occamy_xilinx.tcl b/target/fpga/vivado_ips/occamy_xilinx.tcl index 36029d9c7..c09b23c05 100644 --- a/target/fpga/vivado_ips/occamy_xilinx.tcl +++ b/target/fpga/vivado_ips/occamy_xilinx.tcl @@ -56,4 +56,4 @@ ipx::create_xgui_files [ipx::current_core] ipx::update_checksums [ipx::current_core] ipx::save_core [ipx::current_core] ipx::check_integrity [ipx::current_core] -ipx::save_core [ipx::current_core] \ No newline at end of file +ipx::save_core [ipx::current_core] diff --git a/target/sim/Makefile b/target/sim/Makefile index 80b1f3c03..44b8c2190 100644 --- a/target/sim/Makefile +++ b/target/sim/Makefile @@ -433,7 +433,9 @@ PLATFORM_HEADERS += $(PLATFORM_HEADERS_DIR)/snitch_quad_peripheral.h PLATFORM_HEADERS += $(PLATFORM_HEADERS_DIR)/snitch_hbm_xbar_peripheral.h PLATFORM_HEADERS += $(PLATFORM_HEADERS_DIR)/idma.h -.PHONY: sw clean-headers clean-sw +.PHONY: sw all-headers clean-headers clean-sw + +all-headers: $(PLATFORM_HEADERS) sw: $(PLATFORM_HEADERS) $(MAKE) -C sw/ all diff --git a/target/sim/sw/device/Makefile b/target/sim/sw/device/Makefile index 1bab68e71..913514b75 100644 --- a/target/sim/sw/device/Makefile +++ b/target/sim/sw/device/Makefile @@ -7,6 +7,7 @@ # Add user applications to APPS variable APPS = blas/axpy APPS += blas/gemm +APPS += libomptarget_device TARGET ?= all diff --git a/target/sim/sw/device/apps/common.mk b/target/sim/sw/device/apps/common.mk index eff90e921..f6b5d7333 100644 --- a/target/sim/sw/device/apps/common.mk +++ b/target/sim/sw/device/apps/common.mk @@ -32,7 +32,9 @@ BUILDDIR = $(abspath build) # Dependencies INCDIRS += $(RUNTIME_DIR)/src INCDIRS += $(SNRT_DIR)/api +INCDIRS += $(SNRT_DIR)/api/omp INCDIRS += $(SNRT_DIR)/src +INCDIRS += $(SNRT_DIR)/src/omp INCDIRS += $(SNRT_DIR)/vendor/riscv-opcodes INCDIRS += $(SW_DIR)/shared/platform/generated INCDIRS += $(SW_DIR)/shared/platform @@ -99,6 +101,9 @@ $(BUILDDIR): $(DEP): $(SRCS) | $(BUILDDIR) $(RISCV_CC) $(RISCV_CFLAGS) -MM -MT '$(ELF)' $< > $@ +$(BUILDDIR)/%.o: $(SRC_DIR)/%.c | $(BUILDDIR) + $(RISCV_CC) $(RISCV_CFLAGS) -c $< -o $@ + $(ELF): $(DEP) $(LD_SRCS) | $(BUILDDIR) $(RISCV_CC) $(RISCV_CFLAGS) $(RISCV_LDFLAGS) $(SRCS) -o $@ diff --git a/target/sim/sw/device/apps/libomptarget_device/Makefile b/target/sim/sw/device/apps/libomptarget_device/Makefile new file mode 100644 index 000000000..171b4f9a1 --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/Makefile @@ -0,0 +1,32 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Cyril Koenig + +# Usage of absolute paths is required to externally include this Makefile +MK_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST)))) +SRC_DIR := $(realpath $(MK_DIR)/src) + +APP ?= omptarget_device +SRCS ?= $(SRC_DIR)/main.c $(SRC_DIR)/sw_mailbox.c +INCDIRS += $(SRC_DIR) + +.PHONY: clean +clean: + +include ../common.mk + +OBJS := $(subst $(SRC_DIR), $(BUILDDIR), $(SRCS:.c=.o)) +LIB := $(BUILDDIR)/libomptarget_device.a + +$(BUILDDIR)/origin.ld: | $(BUILDDIR) + echo "L3_ORIGIN = 0xC0000000;" > $(BUILDDIR)/origin.ld + +# We first extract objects from libnnruntime and then link them with our objects +$(BUILDDIR)/libomptarget_device.a: $(OBJS) | $(BUILDDIR) + cd $(BUILDDIR) && $(RISCV_AR) -x $(SNRT_LIB_DIR)/lib$(SNRT_LIB_NAME).a + $(RISCV_AR) $(RISCV_ARFLAGS) $@ $(BUILDDIR)/*.o + +# For this target, only build the library +all: $(LIB) diff --git a/target/sim/sw/device/apps/libomptarget_device/link.ld b/target/sim/sw/device/apps/libomptarget_device/link.ld new file mode 100644 index 000000000..0f54b2016 --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/link.ld @@ -0,0 +1,130 @@ +/* Copyright 2020 ETH Zurich and University of Bologna. */ +/* Solderpad Hardware License, Version 0.51, see LICENSE for details. */ +/* SPDX-License-Identifier: SHL-0.51 */ + +OUTPUT_ARCH( "riscv" ) +ENTRY(_start) + +/* Memory section should be provided in a separate, platform-specific */ +/* file. It should define at least the L1 and L3 memory blocks. */ +MEMORY +{ + L3 : ORIGIN = 0xC0000000, LENGTH = 0x800000 +} + +SECTIONS +{ + + /* Program code goes into L3 */ + .text : + { + . = ALIGN(4); + *(.init) + *(.text.init) + *(.text.startup) + *(.text) + *(.text*) + *(.text) + . = ALIGN(4); + _etext = .; + } >L3 + + /* By default, constant data goes into L3, right after code section */ + .rodata : + { + . = ALIGN(4); + *(.rodata) + *(.rodata*) + . = ALIGN(4); + } >L3 + + /* HTIF section for FESVR */ + .htif : { } >L3 + + /* Thread Local Storage sections */ + .tdata : + { + __tdata_start = .; + *(.tdata .tdata.* .gnu.linkonce.td.*) + __tdata_end = .; + } >L3 + .tbss : + { + __tbss_start = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + __tbss_end = .; + } >L3 + + /* Cluster Local Storage sections */ + .cdata : + { + __cdata_start = .; + *(.cdata .cdata.*) + __cdata_end = .; + } >L3 + .cbss : + { + __cbss_start = .; + *(.cbss .cbss.*) + __cbss_end = .; + } >L3 + + /* used by the startup to initialize data */ + _sidata = LOADADDR(.data); + + /* small data section that can be addressed through the global pointer */ + .sdata : + { + __SDATA_BEGIN__ = .; + __global_pointer$ = . + 0x7f0; + *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*) + *(.sdata .sdata.* .gnu.linkonce.s.*) + } >L3 + + /* Initialized data sections goes into L3 */ + .data : + { + __DATA_BEGIN__ = .; + *(.data .data.* .gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } >L3 + _edata = .; PROVIDE (edata = .); + + /* small bss section */ + . = .; + __bss_start = .; + .sbss : + { + *(.dynsbss) + *(.sbss .sbss.* .gnu.linkonce.sb.*) + *(.scommon) + } >L3 + + /* Uninitialized data section */ + .bss : + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. */ + . = ALIGN(. != 0 ? 32 / 8 : 1); + } >L3 + . = ALIGN(32 / 8); + . = SEGMENT_START("ldata-segment", .); + . = ALIGN(32 / 8); + __BSS_END__ = .; + __bss_end = .; + _end = .; PROVIDE (end = .); + + /* Uninitialized data section in L3 */ + .dram : + { + *(.dram) + _edram = .; + } >L3 + + __uart = 0x2002000; +} diff --git a/target/sim/sw/device/apps/libomptarget_device/src/debug.h b/target/sim/sw/device/apps/libomptarget_device/src/debug.h new file mode 100644 index 000000000..dd439f4c7 --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/src/debug.h @@ -0,0 +1,77 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern uint32_t snrt_log_level; + +static inline void snrt_debug_set_loglevel(uint32_t lvl) { snrt_log_level = lvl; }; + +#define LOG_ERROR 0 +#define LOG_WARN 1 +#define LOG_INFO 2 +#define LOG_DEBUG 3 +#define LOG_TRACE 4 + +#if defined(DEBUG) + +#define snrt_error(fmt, ...) \ + ({ \ + if (LOG_ERROR <= snrt_log_level) \ + snrt_printf("[\033[31msnrt(%d,%d):error:%s\033[0m] " fmt, snrt_cluster_idx(), \ + snrt_cluster_core_idx(), __func__, ##__VA_ARGS__); \ + }) +#define snrt_warn(fmt, ...) \ + ({ \ + if (LOG_WARN <= snrt_log_level) \ + snrt_printf("[\033[91msnrt(%d,%d):warn:%s\033[0m] " fmt, snrt_cluster_idx(), \ + snrt_cluster_core_idx(), __func__, ##__VA_ARGS__); \ + }) +#define snrt_info(fmt, ...) \ + ({ \ + if (LOG_INFO <= snrt_log_level) \ + snrt_printf("[\033[33msnrt(%d,%d):info:%s\033[0m] " fmt, snrt_cluster_idx(), \ + snrt_cluster_core_idx(), __func__, ##__VA_ARGS__); \ + }) +#define snrt_debug(fmt, ...) \ + ({ \ + if (LOG_DEBUG <= snrt_log_level) \ + snrt_printf("[\033[35msnrt(%d,%d):debug:%s\033[0m] " fmt, snrt_cluster_idx(), \ + snrt_cluster_core_idx(), __func__, ##__VA_ARGS__); \ + }) +#define snrt_trace(fmt, ...) \ + ({ \ + if (LOG_TRACE <= snrt_log_level) \ + snrt_printf("[\033[96msnrt(%d,%d):trace:%s\033[0m] " fmt, snrt_cluster_idx(), \ + snrt_cluster_core_idx(), __func__, ##__VA_ARGS__); \ + }) + +#else // #if defined(DEBUG) + +#define snrt_error(x...) \ + do { \ + } while (0) +#define snrt_warn(x...) \ + do { \ + } while (0) +#define snrt_info(x...) \ + do { \ + } while (0) +#define snrt_debug(x...) \ + do { \ + } while (0) +#define snrt_trace(x...) \ + do { \ + } while (0) + +#endif // defined(SNRT_DEBUG) + +#ifdef __cplusplus +} +#endif diff --git a/target/sim/sw/device/apps/libomptarget_device/src/main.c b/target/sim/sw/device/apps/libomptarget_device/src/main.c new file mode 100644 index 000000000..17ff5754b --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/src/main.c @@ -0,0 +1,317 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +#include + +#include "sw_mailbox.h" +#include "snrt.h" + +volatile uint32_t dma_wait_cycles = 0; + +//================================================================================ +// MACROS AND SETTINGS +//================================================================================ + +// set to >0 for debugging +#define DEBUG_LEVEL_OFFLOAD_MANAGER 1 + +const uint32_t active_pe = 8; + +/* MAILBOX SIGNALING */ +#define MBOX_DEVICE_READY (0x01U) +#define MBOX_DEVICE_START (0x02U) +#define MBOX_DEVICE_BUSY (0x03U) +#define MBOX_DEVICE_DONE (0x04U) +#define MBOX_DEVICE_STOP (0x0FU) +#define MBOX_DEVICE_LOGLVL (0x10U) +#define MBOX_HOST_READY (0x1000U) +#define MBOX_HOST_DONE (0x3000U) + +#define TO_RUNTIME (0x10000000U) // bypass PULP driver +#define RAB_UPDATE (0x20000000U) // handled by PULP driver +#define RAB_SWITCH (0x30000000U) // handled by PULP driver + +//================================================================================ +// TYPES +//================================================================================ + +// Shrinked gomp_team_t descriptor +typedef struct offload_rab_miss_handler_desc_s { + void (*omp_task_f)(void *arg, uint32_t argc); + void *omp_args; + void *omp_argc; + int barrier_id; +} offload_rab_miss_handler_desc_t; + +typedef uint32_t virt_addr_t; +typedef uint32_t virt_pfn_t; + +// This struct represents a miss in the RAB Miss Hardware FIFO. +typedef struct rab_miss_t { + virt_addr_t virt_addr; + int core_id; + int cluster_id; + int intra_cluster_id; + uint8_t is_prefetch; +} rab_miss_t; + +//================================================================================ +// Data +//================================================================================ +static volatile uint32_t g_printf_mutex = 0; + +static volatile uint32_t *soc_scratch = (uint32_t *)(0x02000014); +struct l3_layout l3l; + +const uint32_t snrt_stack_size __attribute__((weak, section(".rodata"))) = 12; + +// The boot data generated along with the system RTL. +// See `hw/system/snitch_cluster/test/tb_lib.hh` for details. +struct snrt_cluster_bootdata { + uint32_t boot_addr; + uint32_t core_count; + uint32_t hartid_base; + uint32_t tcdm_start; + uint32_t tcdm_size; + uint32_t tcdm_offset; + uint64_t global_mem_start; + uint64_t global_mem_end; + uint32_t cluster_count; + uint32_t s1_quadrant_count; + uint32_t clint_base; +}; + +/** + * @brief Called by each hart before the pre-main barrier in snrt crt0 + * + */ +void _snrt_hier_wakeup(void) { + const uint32_t core_id = snrt_cluster_core_idx(); + + // master core wakes other cluster cores through cluster local clint + if (core_id == 0) { + // clear the interrupt from cva6 + snrt_int_sw_clear(snrt_hartid()); + // wake remaining cluster cores + const unsigned cluster_core_num = snrt_cluster_core_num(); + snrt_int_cluster_set(~0x1 & ((1 << cluster_core_num) - 1)); + } else { + // clear my interrupt + snrt_int_cluster_clr(1 << core_id); + } +} + +//================================================================================ +// TODO: Symbols to declare somewhere else on a merge +//================================================================================ +/** + * @brief A re-entrant wrapper to printf + * + */ +void snrt_printf(const char *format, ...) { + va_list args; + + snrt_mutex_acquire(&g_printf_mutex); + + va_start(args, format); + vprintf(format, args); + va_end(args); + + snrt_mutex_release(&g_printf_mutex); +} + +//================================================================================ +// HERO Functions +//================================================================================ + +static void offload_rab_misses_handler(void *arg, uint32_t argc) { + (void)arg; + (void)argc; + snrt_err("unimplemented!\r\n"); + // static void offload_rab_misses_handler(uint32_t *status) { + // uint32_t *status = (uint32_t)arg; + // if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("offload_rab_misses_handler: synch @%p (0x%x)\n", status, + // *(volatile unsigned int *)status); + // do { + // handle_rab_misses(); + // } while (*((volatile uint32_t *)status) != 0xdeadbeefU); + // if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("offload_rab_misses_handler: synch @%p (0x%x)\n", status, + // *(volatile unsigned int *)status); +} + +static int gomp_offload_manager() { + const uint32_t core_id = snrt_cluster_core_idx(); + + // Init the manager (handshake btw host and accelerator is here) + // gomp_init_offload_manager(); + + // FIXME For the momenent we are not using the cmd sended as trigger. + // It should be used to perform the deactivation of the accelerator, + // as well as other operations, like local data allocation or movement. + // FIXME Note that the offload at the moment use several time the mailbox. + // We should compact the offload descriptor and just sent a pointer to + // that descriptor. + uint32_t cmd = (uint32_t)NULL; + uint32_t data; + + // Offloaded function pointer and arguments + void (*offloadFn)(uint64_t) = NULL; + uint64_t offloadArgs = 0x0; + unsigned nbOffloadRabMissHandlers = 0x0; + uint32_t offload_rab_miss_sync = 0x0U; + // offload_rab_miss_handler_desc_t rab_miss_handler = {.omp_task_f = offload_rab_misses_handler, + // .omp_args = (void *)&offload_rab_miss_sync, + // .omp_argc = 1, + // .barrier_id = -1}; + + int cycles = 0; + uint32_t issue_fpu, dma_busy; + rab_miss_t rab_miss; + // reset_vmm(); + + while (1) { + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("Waiting for command...\n"); + + // (1) Wait for the offload trigger cmd == MBOX_DEVICE_START + mailbox_read((unsigned int *)&cmd, 1); + cycles = read_csr(mcycle); + if (MBOX_DEVICE_STOP == cmd) { + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("Got MBOX_DEVICE_STOP from host, stopping execution now.\n"); + break; + } else if (MBOX_DEVICE_LOGLVL == cmd) { + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("Got command 0x%x, setting log level.\n", cmd); + mailbox_read((unsigned int *)&data, 1); + //snrt_debug_set_loglevel(data); + continue; + } else if (MBOX_DEVICE_START != cmd) { + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("Got unexpected command 0x%x, stopping execution now.\n", cmd); + break; + } + + // (2) The host sends through the mailbox the pointer to the function that should be + // executed on the accelerator. + mailbox_read((unsigned int *)&offloadFn, 1); + + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("tgt_fn @ 0x%x\n", (unsigned int)offloadFn); + + // (3) The host sends through the mailbox the pointer to the arguments that should + // be used. + mailbox_read((unsigned int *)&offloadArgs, 1); + + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("tgt_vars @ 0x%x\n", (unsigned int)offloadArgs); + + // (3b) The host sends through the mailbox the number of rab misses handlers threads + mailbox_read((unsigned int *)&nbOffloadRabMissHandlers, 1); + + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("nbOffloadRabMissHandlers %d/%d\n", nbOffloadRabMissHandlers, active_pe); + + // (3c) Spawning nbOffloadRabMissHandlers + unsigned mhCoreMask = 0; + nbOffloadRabMissHandlers = + nbOffloadRabMissHandlers < active_pe - 1 ? nbOffloadRabMissHandlers : active_pe - 1; + if (nbOffloadRabMissHandlers) { + offload_rab_miss_sync = 0x0U; + for (int pid = active_pe - 1, i = nbOffloadRabMissHandlers; i > 0; i--, pid--) { + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("enabling RAB miss handler on %d\n", pid); + mhCoreMask |= (1 << pid); + } + } + omp_getData()->maxThreads = active_pe - nbOffloadRabMissHandlers; + omp_getData()->numThreads = active_pe - nbOffloadRabMissHandlers; + // eu_dispatch_team_config(mhCoreMask); + // eu_dispatch_push((unsigned int)&offload_rab_misses_handler); + // eu_dispatch_push((unsigned int)&offload_rab_miss_sync); + // eu_dispatch_team_config(omp_getData()->coreMask); + + // (4) Ensure access to offloadArgs. It might be in SVM. + if (offloadArgs != 0x0) { + // FIXME + // pulp_tryread((unsigned int *)offloadArgs); + } + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("begin offloading\n"); + // reset_timer(); + // start_timer(); + + //for (unsigned i = 0; i < 16; i += 2) { + // snrt_trace(" %2d: 0x%08x = ... ; %2d: 0x%08x = ...\n", i, ((uint32_t *)offloadArgs)[i], + // /* *((uint32_t *)(((uint32_t *)offloadArgs)[i])) ,*/ i + 1, + // ((uint32_t *)offloadArgs)[i + 1] /*, *((uint32_t *)(((uint32_t *)offloadArgs)[i + 1]))*/ ); + //} + + // (5) Execute the offloaded function. + // snrt_reset_perf_counter(SNRT_PERF_CNT0); + // snrt_reset_perf_counter(SNRT_PERF_CNT1); + // snrt_start_perf_counter(SNRT_PERF_CNT0, SNRT_PERF_CNT_ISSUE_FPU, core_id); + // snrt_start_perf_counter(SNRT_PERF_CNT1, SNRT_PERF_CNT_DMA_BUSY, core_id); + cycles = read_csr(mcycle); + dma_wait_cycles = 0; + + offloadFn(offloadArgs); + // snrt_stop_perf_counter(SNRT_PERF_CNT0); + // snrt_stop_perf_counter(SNRT_PERF_CNT1); + // issue_fpu = snrt_get_perf_counter(SNRT_PERF_CNT0); + // dma_busy = snrt_get_perf_counter(SNRT_PERF_CNT1); + + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("end offloading\n"); + + // (6) Report EOC and profiling + //snrt_info("cycles: %d\r\n", cycles); + + mailbox_write(MBOX_DEVICE_DONE); + cycles = read_csr(mcycle) - cycles; + mailbox_write(cycles); + mailbox_write(dma_wait_cycles); + + //if (DEBUG_LEVEL_OFFLOAD_MANAGER > 0) + // snrt_trace("Kernel execution time [Snitch cycles] = %d\n", cycles); + + if (nbOffloadRabMissHandlers) { + offload_rab_miss_sync = 0xdeadbeefU; + // gomp_atomic_add_thread_pool_idle_cores(nbOffloadRabMissHandlers); + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + (void)argc; + (void)argv; + unsigned core_idx = snrt_cluster_core_idx(); + unsigned core_num = snrt_cluster_core_num(); + + /** + * One core initializes the global data structures + */ + if (snrt_is_dm_core()) { + // read memory layout from scratch2 + memcpy(&l3l, (void *)soc_scratch[2], sizeof(struct l3_layout)); + g_a2h_rb = (struct ring_buf *)l3l.a2h_rb; + g_a2h_mbox = (struct ring_buf *)l3l.a2h_mbox; + g_h2a_mbox = (struct ring_buf *)l3l.h2a_mbox; + } + + snrt_cluster_hw_barrier(); + + __snrt_omp_bootstrap(core_idx); + + gomp_offload_manager(); + + //snrt_trace("bye\n"); + // exit + __snrt_omp_destroy(core_idx); + snrt_hero_exit(0); + return 0; +} diff --git a/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.c b/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.c new file mode 100644 index 000000000..96051f2a9 --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.c @@ -0,0 +1,93 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include "sw_mailbox.h" +#include "snrt.h" // snrt_mcycle + +/*********************************************************************************** + * MACROS + ***********************************************************************************/ + +#define SYS_exit 60 +#define SYS_write 64 +#define SYS_read 63 +#define SYS_wake 1235 +#define SYS_cycle 1236 + +/*********************************************************************************** + * DATA + ***********************************************************************************/ +volatile struct ring_buf *g_a2h_rb; +volatile struct ring_buf *g_a2h_mbox; +volatile struct ring_buf *g_h2a_mbox; + +/*********************************************************************************** + * FUNCTIONS + ***********************************************************************************/ + +int syscall(uint64_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2, + uint64_t arg3, uint64_t arg4) { + uint64_t magic_mem[6]; + int ret; + uint32_t retries = 0; + + volatile struct ring_buf *rb = g_a2h_rb; + + magic_mem[0] = which; + magic_mem[1] = arg0; + magic_mem[2] = arg1; + magic_mem[3] = arg2; + magic_mem[4] = arg3; + magic_mem[5] = arg4; + + do { + ret = rb_device_put(rb, (void *)magic_mem); + if (ret) { + ++retries; + csleep(10); + } + } while (ret != 0); + return retries; +} + +#define MBOX_DEVICE_PRINT (0x05U) + +void snrt_putchar(char c) { + //*(volatile uint32_t *)0x2002000 = c; + mailbox_write(MBOX_DEVICE_PRINT); + csleep(1000); + mailbox_write(c); +} + +void snrt_hero_exit(int code) { syscall(SYS_exit, code, 0, 0, 0, 0); } + +/*********************************************************************************** + * MAILBOX + ***********************************************************************************/ + +int mailbox_try_read(uint32_t *buffer) { + return rb_device_get(g_h2a_mbox, buffer) == 0 ? 1 : 0; +} +inline int mailbox_read(uint32_t *buffer, size_t n_words) { + int ret; + while (n_words--) { + do { + ret = rb_device_get(g_h2a_mbox, &buffer[n_words]); + if (ret) { + csleep(10); + } + } while (ret); + } + return 0; +} +int mailbox_write(uint32_t word) { + int ret; + do { + ret = rb_device_put(g_a2h_mbox, &word); + if (ret) { + csleep(10); + } + } while (ret); + return ret; +} diff --git a/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.h b/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.h new file mode 100644 index 000000000..eef409566 --- /dev/null +++ b/target/sim/sw/device/apps/libomptarget_device/src/sw_mailbox.h @@ -0,0 +1,203 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include +#include +#include +#include +#include "snrt.h" + +/*********************************************************************************** + * MACROS + ***********************************************************************************/ + +#define MBOX_DEVICE_READY (0x01U) +#define MBOX_DEVICE_START (0x02U) +#define MBOX_DEVICE_BUSY (0x03U) +#define MBOX_DEVICE_DONE (0x04U) +#define MBOX_DEVICE_STOP (0x0FU) +#define MBOX_DEVICE_LOGLVL (0x10U) +#define MBOX_HOST_READY (0x1000U) +#define MBOX_HOST_DONE (0x3000U) + +#define SYS_exit 60 +#define SYS_write 64 +#define SYS_read 63 +#define SYS_wake 1235 +#define SYS_cycle 1236 + +/*********************************************************************************** + * TYPES + ***********************************************************************************/ + +/** + * @brief Ring buffer for simple communication from accelerator to host. + * @tail: Points to the element in `data` which is read next + * @head: Points to the element in `data` which is written next + * @size: Number of elements in `data`. Head and tail pointer wrap at `size` + * @element_size: Size of each element in bytes + * @data_p: points to the base of the data buffer in physical address + * @data_v: points to the base of the data buffer in virtual address space + */ +struct ring_buf { + uint32_t head; + uint32_t size; + uint32_t tail; + uint32_t element_size; + uint64_t data_v; + uint64_t data_p; +}; + + +/*********************************************************************************** + * DATA + ***********************************************************************************/ +extern volatile struct ring_buf *g_a2h_rb; +extern volatile struct ring_buf *g_a2h_mbox; +extern volatile struct ring_buf *g_h2a_mbox; + +/*********************************************************************************** + * INLINES + ***********************************************************************************/ + +static inline void dump_mbox(struct ring_buf *rbuf) { + printf("---DUMPING NOW---\n\r"); + printf("mbox (%x)\n\r", rbuf); + uint8_t* addr = rbuf; + for(int i = 0; i < sizeof(struct ring_buf); i++) { + if(i % 8 == 0) + printf("\n\r(%x) : ", addr); + printf("%x-", *(addr++)); + } + printf("\n\r"); + printf("head : %#x = %u\n\r" , &rbuf->head , rbuf->head ); + printf("size : %#x = %u\n\r" , &rbuf->size , rbuf->size ); + printf("tail : %#x = %u\n\r" , &rbuf->tail , rbuf->tail ); + printf("data_p : %#x = %lx\n\r", &rbuf->data_p , rbuf->data_p ); + printf("data_v : %#x = %lx\n\r", &rbuf->data_v , rbuf->data_v ); + //printf("tail %u, data_v %" PRIu64 ", element_size %u, size %u, data_p %" PRIu64 ", head %u\n\r", rbuf->tail, rbuf->data_v, rbuf->element_size, rbuf->size, rbuf->data_p, rbuf->head); + printf("---DUMPING ENDS---\n\r"); +} + +/** + * @brief Copy data from `el` in the next free slot in the ring-buffer on the + *physical addresses + * + * @param rb pointer to the ring buffer struct + * @param el pointer to the data to be copied into the ring buffer + * @return int 0 on succes, -1 if the buffer is full + */ +static inline int rb_device_put(volatile struct ring_buf *rb, void *el) { + uint32_t next_head = (rb->head + 1) % rb->size; + // caught the tail, can't put data + if (next_head == rb->tail) + return -1; + for (uint32_t i = 0; i < rb->element_size; i++) + *((uint8_t *)rb->data_p + rb->element_size *rb->head + i) = + *((uint8_t *)el + i); + rb->head = next_head; + return 0; +} +/** + * @brief Pop element from ring buffer on virtual addresses + * + * @param rb pointer to ring buffer struct + * @param el pointer to where element is copied to + * @return 0 on success, -1 if no element could be popped + */ +static inline int rb_host_get(volatile struct ring_buf *rb, void *el) { + // caught the head, can't get data + if (rb->tail == rb->head) + return -1; + for (uint32_t i = 0; i < rb->element_size; i++) + *((uint8_t *)el + i) = + *((uint8_t *)rb->data_v + rb->element_size * rb->tail + i); + rb->tail = (rb->tail + 1) % rb->size; + return 0; +} + +/** + * @brief Copy data from `el` in the next free slot in the ring-buffer on the + *virtual addresses + * + * @param rb pointer to the ring buffer struct + * @param el pointer to the data to be copied into the ring buffer + * @return int 0 on succes, -1 if the buffer is full + */ +static inline int rb_host_put(volatile struct ring_buf *rb, void *el) { + uint32_t next_head = (rb->head + 1) % rb->size; + // caught the tail, can't put data + if (next_head == rb->tail) + return -1; + for (uint32_t i = 0; i < rb->element_size; i++) + *((uint8_t *)rb->data_v + rb->element_size *rb->head + i) = + *((uint8_t *)el + i); + rb->head = next_head; + return 0; +} +/** + * @brief Pop element from ring buffer on physicl addresses + * + * @param rb pointer to ring buffer struct + * @param el pointer to where element is copied to + * @return 0 on success, -1 if no element could be popped + */ +static inline int rb_device_get(volatile struct ring_buf *rb, void *el) { + // caught the head, can't get data + if (rb->tail == rb->head) + return -1; + for (uint32_t i = 0; i < rb->element_size; i++) + *((uint8_t *)el + i) = + *((uint8_t *)rb->data_p + rb->element_size * rb->tail + i); + rb->tail = (rb->tail + 1) % rb->size; + return 0; +} +/** + * @brief Init the ring buffer. See `struct ring_buf` for details + */ +static inline void rb_init(volatile struct ring_buf *rb, uint64_t size, + uint64_t element_size) { + rb->tail = 0; + rb->head = 0; + rb->size = size; + rb->element_size = element_size; +} + +/** + * @brief Holds physical addresses of the shared L3 + * @a2h_rb: accelerator to host ring buffer + * @head: base of heap memory + */ +struct l3_layout { + uint32_t a2h_rb; + uint32_t a2h_mbox; + uint32_t h2a_mbox; + uint32_t heap; +}; + +/*********************************************************************************** + * PUBLICS + ***********************************************************************************/ +int syscall(uint64_t which, uint64_t arg0, uint64_t arg1, uint64_t arg2, + uint64_t arg3, uint64_t arg4); + +static inline void csleep(uint32_t cycles) { + uint32_t start = snrt_mcycle(); + while ((snrt_mcycle() - start) < cycles) {} +} + +void snrt_hero_exit(int code); +/** + * @brief Blocking mailbox read access + */ +int mailbox_read(uint32_t *buffer, size_t n_words); +/** + * @brief Non-Blocking mailbox read access. Return 1 on success, 0 on fail + */ +int mailbox_try_read(uint32_t *buffer); +/** + * @brief Blocking mailbox write access + */ +int mailbox_write(uint32_t word); diff --git a/target/sim/sw/device/runtime/Makefile b/target/sim/sw/device/runtime/Makefile index fab277dfe..df59d075d 100644 --- a/target/sim/sw/device/runtime/Makefile +++ b/target/sim/sw/device/runtime/Makefile @@ -19,7 +19,9 @@ SNRT_DIR = $(SNITCH_ROOT)/sw/snRuntime # Dependencies INCDIRS += $(SNRT_DIR)/src +INCDIRS += $(SNRT_DIR)/src/omp INCDIRS += $(SNRT_DIR)/api +INCDIRS += $(SNRT_DIR)/api/omp INCDIRS += $(SNRT_DIR)/vendor/riscv-opcodes INCDIRS += $(SW_DIR)/shared/platform INCDIRS += $(SW_DIR)/shared/platform/generated diff --git a/target/sim/sw/device/runtime/src/occamy_start.c b/target/sim/sw/device/runtime/src/occamy_start.c index e16e9812e..bb49676f2 100644 --- a/target/sim/sw/device/runtime/src/occamy_start.c +++ b/target/sim/sw/device/runtime/src/occamy_start.c @@ -13,6 +13,9 @@ #define SNRT_CRT0_EXIT #define SNRT_CRT0_ALTERNATE_EXIT +static inline void snrt_exit(int exit_code) { +} + static inline void snrt_crt0_callback3() { _snrt_cluster_hw_barrier = cluster_hw_barrier_addr(snrt_cluster_idx()); } diff --git a/target/sim/sw/device/runtime/src/putchar.c b/target/sim/sw/device/runtime/src/putchar.c index 84f345d0e..3ef169e56 100644 --- a/target/sim/sw/device/runtime/src/putchar.c +++ b/target/sim/sw/device/runtime/src/putchar.c @@ -3,4 +3,4 @@ // SPDX-License-Identifier: Apache-2.0 // Provide an implementation for putchar. -void _putchar(char character) {} +void __attribute__((weak)) _putchar(char character) {} diff --git a/target/sim/sw/device/runtime/src/snrt.c b/target/sim/sw/device/runtime/src/snrt.c index 12003018e..cc02c074d 100644 --- a/target/sim/sw/device/runtime/src/snrt.c +++ b/target/sim/sw/device/runtime/src/snrt.c @@ -7,7 +7,11 @@ #include "alloc.c" #include "cls.c" #include "cluster_interrupts.c" +#include "dm.c" #include "dma.c" +#include "eu.c" +#include "kmp.c" +#include "omp.c" #include "global_interrupts.c" #include "occamy_device.c" #include "occamy_memory.c" diff --git a/target/sim/sw/device/runtime/src/snrt.h b/target/sim/sw/device/runtime/src/snrt.h index 57686fe94..0dd5f7f59 100644 --- a/target/sim/sw/device/runtime/src/snrt.h +++ b/target/sim/sw/device/runtime/src/snrt.h @@ -18,19 +18,26 @@ #include "cls_decls.h" #include "cluster_interrupt_decls.h" #include "global_interrupt_decls.h" +#include "riscv_decls.h" #include "memory_decls.h" #include "sync_decls.h" #include "team_decls.h" +#include "start_decls.h" // Implementation #include "alloc.h" #include "cls.h" #include "cluster_interrupts.h" +#include "dm.h" #include "dma.h" #include "dump.h" #include "global_interrupts.h" #include "occamy_device.h" #include "occamy_memory.h" +#include "eu.h" +#include "kmp.h" +#include "omp.h" +#include "perf_cnt.h" #include "printf.h" #include "riscv.h" #include "ssr.h" diff --git a/target/sim/sw/device/toolchain.mk b/target/sim/sw/device/toolchain.mk index 9e83aad99..7fa4cc69a 100644 --- a/target/sim/sw/device/toolchain.mk +++ b/target/sim/sw/device/toolchain.mk @@ -6,4 +6,6 @@ BENDER ?= bender SNITCH_ROOT = $(shell $(BENDER) path snitch_cluster) -include $(SNITCH_ROOT)/target/snitch_cluster/sw/toolchain.mk \ No newline at end of file +RISCV_CFLAGS += --sysroot=$(HERO_INSTALL)/rv32imafd-ilp32d/riscv32-unknown-elf -target riscv32-unknown-elf +RISCV_LDFLAGS += -L$(HERO_INSTALL)/lib/clang/15.0.0/rv32imafdvzfh-ilp32d/lib/ +include $(SNITCH_ROOT)/target/snitch_cluster/sw/toolchain.mk diff --git a/util/bin2jtag.py b/util/bin2jtag.py new file mode 100755 index 000000000..50938f5f5 --- /dev/null +++ b/util/bin2jtag.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 + +# Copyright 2020 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +# Generate a tcl script for writing a binary to a memory location in the FPGA +# +# Usage: +# bin2jtag.py -d hw_axi_1 -b 1000 bootrom.bin > mem.tcl +# +# In vivado then `source mem.tcl` to execute +# +# Requires bin2coe +# - https://github.com/anishathalye/bin2coe/blob/master/src/bin2coe + +from argparse import ArgumentParser +from io import BytesIO +from signal import signal, SIGPIPE, SIG_DFL +import sys + +import bin2coe.convert + +signal(SIGPIPE, SIG_DFL) + + +def main(): + parser = ArgumentParser() + parser.add_argument('-d', '--device', type=str, default='hw_axi_1', help='what HW axi to use') + parser.add_argument('-b', '--base', type=str, default='0', help='memory base address in hex') + parser.add_argument('-c', '--chunk-size', type=int, default=32, help='number of words per burst transaction') + parser.add_argument('binary', metavar='BIN', type=str, nargs=1, help='bin input') + options = parser.parse_args() + + width = 32 + radix = 16 + fd_o = sys.stdout + + with open(options.binary[0], 'rb') as f: + data = f.read() + + # Writes jtag commands to fd_o + convert(fd_o, data, width, radix, int(options.base, 16), options.device, True, options.chunk_size) + + +def convert(output, data, width, radix, address, dev, rb, chunk_size): + # License + output.write("# Copyright 2020 ETH Zurich and University of Bologna.\n") + output.write("# Solderpad Hardware License, Version 0.51, see LICENSE for details.\n") + output.write("# SPDX-License-Identifier: SHL-0.51\n") + + # Pre tcl script + output.write("set errs 0\n") + + # Templates for one data write + t = f"[get_hw_axis {dev}]" + tpl = "create_hw_axi_txn -cache 0 -force {n} {t} -address {a} -len {l} -type write -data {d}" + tpl_rb = "create_hw_axi_txn -cache 0 -force {n} {t} -address {a} -len {l} -type read" + tpl_run = "run_hw_axi {txn}" + tx_name = "txn" + + # Get coe format from bin2coe + temp = BytesIO() + bin2coe.convert.convert(output=temp, data=data, width=width, depth=0, fill=0, + radix=radix, little_endian=True, mem=True) + + # Split the coe format into string words + word_list = [w for w in temp.getvalue().decode("utf-8").split("\n") if w != ""] + + # Loop over the string words + i = 0 + while i < len(word_list): + # Take care at for the end of the list + k = min(len(word_list)-i, chunk_size) + # Reorganize words + words = word_list[i:i+k][::-1] + # Write axi write + out = tpl.format(n=tx_name, t=t, a=f"{address:08x}", d="_".join(words), l=len(words)) + '\n' + output.write(out) + output.write(tpl_run.format(txn=tx_name) + '\n') + # Write axi readback + if rb: + out = tpl_rb.format(n="wb", t=t, a=f"{address:08x}", l=len(words)) + '\n' + output.write(out) + output.write(f"run_hw_axi {'wb'}\n") + output.write("set resp [get_property DATA [get_hw_axi_txns wb]]\n") + s = f"set exp {''.join(words)}\n" + s += "if {$exp ne $resp} { puts Error; incr errs }\n" + output.write(s) + # Get to next chunk + address += k * 4 + i += k + + if rb: + output.write("puts \"Errors: $errs\"\n") + + +if __name__ == "__main__": + main()