From 42cbf47d6275a20a21fe905a322e47c9186baa34 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 8 Aug 2024 13:15:24 +0200 Subject: [PATCH 001/102] [Deps] add pyxsi fork and its deps --- docker/Dockerfile.finn | 8 +++++++- fetch-repos.sh | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index 5126ed3ff4..df144f3a0d 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -65,7 +65,13 @@ RUN apt-get update && \ python-is-python3 \ python3-pip \ python3-setuptools-scm \ - python3-venv + python3-venv \ + pybind11-dev \ + libfmt-dev \ + libboost-dev \ + libjansson-dev \ + libgetdata-dev \ + libtinfo5 RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config RUN locale-gen "en_US.UTF-8" diff --git a/fetch-repos.sh b/fetch-repos.sh index 2033973f2a..de137a6002 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,6 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" +PYXSI_COMMIT="b73764171bcde9612945dc694c3b95180ecc7f35" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" @@ -51,6 +52,7 @@ AVNET_BDF_URL="https://github.com/Avnet/bdf.git" XIL_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git" RFSOC4x2_BDF_URL="https://github.com/RealDigitalOrg/RFSoC4x2-BSP.git" KV260_BDF_URL="https://github.com/Xilinx/XilinxBoardStore.git" +PYXSI_URL="https://github.com/maltanar/pyxsi.git" QONNX_DIR="qonnx" FINN_EXP_DIR="finn-experimental" @@ -63,6 +65,7 @@ AVNET_BDF_DIR="avnet-bdf" XIL_BDF_DIR="xil-bdf" RFSOC4x2_BDF_DIR="rfsoc4x2-bdf" KV260_SOM_BDF_DIR="kv260-som-bdf" +PYXSI_DIR="pyxsi" # absolute path to this script, e.g. /home/user/bin/foo.sh SCRIPT=$(readlink -f "$0") @@ -126,6 +129,7 @@ fetch_repo $AVNET_BDF_URL $AVNET_BDF_COMMIT $AVNET_BDF_DIR fetch_repo $XIL_BDF_URL $XIL_BDF_COMMIT $XIL_BDF_DIR fetch_repo $RFSOC4x2_BDF_URL $RFSOC4x2_BDF_COMMIT $RFSOC4x2_BDF_DIR fetch_repo $KV260_BDF_URL $KV260_BDF_COMMIT $KV260_SOM_BDF_DIR +fetch_repo $PYXSI_URL $PYXSI_COMMIT $PYXSI_DIR # Can skip downloading of board files entirely if desired if [ "$FINN_SKIP_BOARD_FILES" = "1" ]; then From b6dba8a12ff5a4f4f0834f1b31e2ccb23ed235cb Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 8 Aug 2024 13:15:47 +0200 Subject: [PATCH 002/102] [Util] add util to fetch Vivado path --- src/finn/util/basic.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 91c191962f..cdc51cdca7 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -125,6 +125,19 @@ def get_finn_root(): ) +def get_vivado_root(): + "Return the root directory that Vivado is installed into." + + try: + return os.environ["XILINX_VIVADO"] + except KeyError: + raise Exception( + """Environment variable XILINX_VIVADO must be set + correctly. Please ensure you have launched the Docker contaier correctly. + """ + ) + + def pyverilate_get_liveness_threshold_cycles(): """Return the number of no-output cycles rtlsim will wait before assuming the simulation is not finishing and throwing an exception.""" From 1468dbe7c8cdf825e21b899004de1df5124e73ba Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 8 Aug 2024 13:16:14 +0200 Subject: [PATCH 003/102] [Sim] add a first draft of pyxsi-based simulation, untested --- src/finn/util/pyxsi.py | 324 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 src/finn/util/pyxsi.py diff --git a/src/finn/util/pyxsi.py b/src/finn/util/pyxsi.py new file mode 100644 index 0000000000..db7446c945 --- /dev/null +++ b/src/finn/util/pyxsi.py @@ -0,0 +1,324 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import errno +import os +import os.path +import pyxsi +import shutil + +from finn.util.basic import get_finn_root, get_vivado_root, launch_process_helper +from finn.util.pyverilator import make_single_source_file + + +def compile_sim_obj(top_module_name, source_list, sim_out_dir): + pyxsi_path = get_finn_root() + "/deps/pyxsi" + xsimincl_path = get_vivado_root() + "/data/xsim/include" + # first, copy the pyxsi.so (XSI python bindings) into our sim dir + # TODO skip building the .o/.so files here if they already exist + cmd_build_pybind = [ + "g++", + "-Wall", + "-Werror", + "-g", + "-fPIC", + "-std=c++20", + "-I/usr/include/python3.10", + "-I" + xsimincl_path, + "-Isrc", + "-c", + "-o", + "pybind.o", + "src/pybind.cpp", + ] + launch_process_helper(cmd_build_pybind, cwd=pyxsi_path) + + cmd_build_xsiloader = [ + "g++", + "-Wall", + "-Werror", + "-g", + "-fPIC", + "-std=c++20", + "-I/usr/include/python3.10", + "-I" + xsimincl_path, + "-Isrc", + "-c", + "-o", + "xsi_loader.o", + "src/xsi_loader.cpp", + ] + launch_process_helper(cmd_build_xsiloader, cwd=pyxsi_path) + + cmd_build_pyxsi_so = [ + "g++", + "-Wall", + "-Werror", + "-g", + "-fPIC", + "-std=c++20", + "-I/usr/include/python3.10", + "-I" + xsimincl_path, + "-Isrc", + "-shared", + "-o", + "pyxsi.so", + "pybind.o", + "xsi_loader.o", + "-lfmt", + "-ldl", + ] + launch_process_helper(cmd_build_pyxsi_so, cwd=pyxsi_path) + + shutil.copy(pyxsi_path + "/pyxsi.so", sim_out_dir) + # create a single source file from the list of sources + # TODO this should be no longer necessary for pyxsi since we have .prj files + single_source_file = sim_out_dir + "/all_sources.v" + make_single_source_file(source_list, single_source_file) + # create a .prj file with the single source file + with open(sim_out_dir + "/rtlsim.prj", "w") as f: + f.write("verilog work all_sources.v\n") + + # now call xelab to generate the .so for the design to be simulated + # TODO make debug controllable to allow faster sim when desired + cmd_xelab = [ + "xelab", + "work." + top_module_name, + "-prj", + "rtlsim.prj", + "-debug", + "all", + "-dll", + "-s", + top_module_name, + ] + launch_process_helper(cmd_xelab, cwd=sim_out_dir) + out_so_relative_path = "xsim.dir/%s/xsimk.so" % top_module_name + out_so_full_path = sim_out_dir + "/" + out_so_relative_path + + if not os.path.isfile(out_so_full_path): + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), out_so_full_path) + + return (sim_out_dir, out_so_relative_path) + + +def load_sim_obj(sim_out_dir, out_so_relative_path): + oldcwd = os.getcwd() + os.chdir(sim_out_dir) + sim = pyxsi.XSI(out_so_relative_path) + os.chdir(oldcwd) + return sim + + +def _find_signal(sim, signal_name): + signal_list = [sim.get_port_name(i) for i in range(sim.get_port_count())] + # handle both mixed caps and lowercase signal names + if signal_name in signal_list: + return signal_name + elif signal_name.lower() in signal_list: + return signal_name.lower() + else: + raise Exception("Signal not found: " + signal_name) + + +def _read_signal(sim, signal_name): + signal_name = _find_signal(sim, signal_name) + port_val = sim.get_port_value(signal_name) + return int(port_val, 2) + + +def _write_signal(sim, signal_name, signal_value): + signal_name = _find_signal(sim, signal_name) + signal_len = len(sim.get_port_value(signal_name)) + if signal_value < 0: + raise Exception("TODO: _write_signal needs fix for 2s complement neg values") + signal_bin_value = f"{signal_value:0{signal_len}b}"[-signal_len:] + sim.set_port_value(signal_name, signal_bin_value) + + +def reset_rtlsim(sim, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): + _write_signal(sim, clk_name, 0) + _write_signal(sim, rst_name, 0 if active_low else 1) + for _ in range(2): + toggle_clk(sim, clk_name) + + signals_to_write = {} + signals_to_write[rst_name] = 1 if active_low else 0 + toggle_clk(sim, clk_name, signals_to_write) + toggle_clk(sim, clk_name) + + +def toggle_clk(sim, clk_name="ap_clk", signals_to_write={}): + """Toggles the clock input in pyverilator once.""" + toggle_neg_edge(sim, clk_name=clk_name) + toggle_pos_edge(sim, clk_name=clk_name, signals_to_write=signals_to_write) + + +def toggle_neg_edge(sim, clk_name="ap_clk"): + _write_signal(sim, clk_name, 0) + sim.run(5000) + + +def toggle_pos_edge(sim, clk_name="ap_clk", signals_to_write={}): + _write_signal(sim, clk_name, 1) + sim.run(5000) + # Write IO signals a delta cycle after rising edge + if bool(signals_to_write): # if dict non-empty + for sig in signals_to_write.keys(): + _write_signal(sim, sig, signals_to_write[sig]) + comb_update_and_trace(sim) + + +def comb_update_and_trace(sim): + # TODO anything needed here for tracing or updates? + pass + + +def rtlsim_multi_io( + sim, + io_dict, + num_out_values, + trace_file="", + sname="_V_V_", + liveness_threshold=10000, + hook_preclk=None, + hook_postclk=None, +): + """Runs the XSI-based simulation by passing the input values to the simulation, + toggle the clock and observing the execution time. Function contains also an + observation loop that can abort the simulation if no output value is produced + after a set number of cycles. Can handle multiple i/o streams. See function + implementation for details on how the top-level signals should be named. + + Arguments: + + * sim: the pyxsi object for simulation + * io_dict: a dict of dicts in the following format: + {"inputs" : {"in0" : , "in1" : }, + "outputs" : {"out0" : [], "out1" : []} } + is a list of Python arbitrary-precision ints indicating + what data to push into the simulation, and the output lists are + similarly filled when the simulation is complete + * num_out_values: number of total values to be read from the simulation to + finish the simulation and return. + * trace_file: vcd dump filename, empty string (no vcd dump) by default + * sname: signal naming for streams, "_V_V_" by default, vitis_hls uses "_V_" + * liveness_threshold: if no new output is detected after this many cycles, + terminate simulation + * hook_preclk: hook function to call prior to clock tick + * hook_postclk: hook function to call after clock tick + + Returns: number of clock cycles elapsed for completion + + """ + + # TODO tracing/debug + + for outp in io_dict["outputs"]: + _write_signal(sim, outp + sname + "TREADY", 1) + + # observe if output is completely calculated + # total_cycle_count will contain the number of cycles the calculation ran + output_done = False + total_cycle_count = 0 + output_count = 0 + old_output_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + + # Dictionary that will hold the signals to drive to DUT + signals_to_write = {} + + while not (output_done): + if hook_preclk: + hook_preclk(sim) + # Toggle falling edge to arrive at a delta cycle before the rising edge + toggle_neg_edge(sim) + + # examine signals, decide how to act based on that but don't update yet + # so only _read_signal access in this block, no _write_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + if ( + _read_signal(sim, signal_name + "TREADY") == 1 + and _read_signal(sim, signal_name + "TVALID") == 1 + ): + inputs = inputs[1:] + io_dict["inputs"][inp] = inputs + + for outp in io_dict["outputs"]: + outputs = io_dict["outputs"][outp] + signal_name = outp + sname + if ( + _read_signal(sim, signal_name + "TREADY") == 1 + and _read_signal(sim, signal_name + "TVALID") == 1 + ): + outputs = outputs + [_read_signal(sim, signal_name + "TDATA")] + output_count += 1 + io_dict["outputs"][outp] = outputs + + # update signals based on decisions in previous block, but don't examine anything + # so only _write_signal access in this block, no _read_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 + signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 + + # Toggle rising edge to arrive at a delta cycle before the falling edge + toggle_pos_edge(sim, signals_to_write=signals_to_write) + if hook_postclk: + hook_postclk(sim) + + total_cycle_count = total_cycle_count + 1 + + if output_count == old_output_count: + no_change_count = no_change_count + 1 + else: + no_change_count = 0 + old_output_count = output_count + + # check if all expected output words received + if output_count == num_out_values: + output_done = True + + # end sim on timeout + if no_change_count == liveness_threshold: + # TODO end tracing? + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." + ) + + # TODO end tracing? + + return total_cycle_count From 6907167a12101c7cd75e57588195c10bf691a403 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 8 Aug 2024 17:20:04 +0200 Subject: [PATCH 004/102] [Sim] try enabling pyxsi logging and tracing --- src/finn/util/pyxsi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/finn/util/pyxsi.py b/src/finn/util/pyxsi.py index db7446c945..dc34b3e222 100644 --- a/src/finn/util/pyxsi.py +++ b/src/finn/util/pyxsi.py @@ -127,11 +127,15 @@ def compile_sim_obj(top_module_name, source_list, sim_out_dir): return (sim_out_dir, out_so_relative_path) -def load_sim_obj(sim_out_dir, out_so_relative_path): +def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None): + vivadolib_path = get_vivado_root() + "/lib/lnx64.o" oldcwd = os.getcwd() os.chdir(sim_out_dir) - sim = pyxsi.XSI(out_so_relative_path) + oldld = os.environ["LD_LIBRARY_PATH"] + os.environ["LD_LIBRARY_PATH"] = vivadolib_path + sim = pyxsi.XSI(out_so_relative_path, tracefile=tracefile, logfile="rtlsim.log") os.chdir(oldcwd) + os.environ["LD_LIBRARY_PATH"] = oldld return sim From 5a5461c1c7abecdb536f98ee9f3d6bb47f075ff0 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 8 Aug 2024 17:20:29 +0200 Subject: [PATCH 005/102] [Op] experiment with enabling rtlsim with pyxsi --- .../fpgadataflow/hls/addstreams_hls.py | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index a3f0e043f8..88205df97c 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -31,7 +31,9 @@ from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.util.basic import make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy +from finn.util.pyxsi import compile_sim_obj, load_sim_obj, reset_rtlsim, rtlsim_multi_io class AddStreams_hls(AddStreams, HLSBackend): @@ -125,8 +127,8 @@ def execute_node(self, context, graph): rtlsim_inp1 = npy_to_rtlsim_input( "{}/input_1.npy".format(code_gen_dir), export_idt, nbits ) - super().reset_rtlsim(sim) - super().toggle_clk(sim) + # super().reset_rtlsim(sim) + # super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp0, rtlsim_inp1) odt = self.get_output_datatype() target_bits = odt.bitwidth() @@ -251,3 +253,33 @@ def pragmas(self): "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") + + def prepare_rtlsim(self): + verilog_files = self.get_all_verilog_filenames(abspath=True) + single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") + + ret = compile_sim_obj(self.get_verilog_top_module_name(), verilog_files, single_src_dir) + + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", ret[0] + "/" + ret[1]) + + def get_rtlsim(self): + sim_xo_path = self.get_nodeattr("rtlsim_so") + sim_base, sim_rel = sim_xo_path.split("xsim.dir") + sim_rel = "xsim.dir" + sim_rel + tracefile = "trace.wdb" + return load_sim_obj(sim_base, sim_rel, tracefile) + + def rtlsim(self, sim, inp, inp2=None): + """Runs the pyverilator simulation by passing the input values to the simulation, + toggle the clock and observing the execution time. Function contains also an + observation loop that can abort the simulation if no output value is produced + after 100 cycles.""" + + reset_rtlsim(sim) + io_dict = {"inputs": {"in0": inp, "in1": inp2}, "outputs": {"out": []}} + num_out_values = self.get_number_output_values() + sname = "_" + self.hls_sname() + "_" + rtlsim_multi_io(sim, io_dict, num_out_values, sname=sname) + + return io_dict["outputs"]["out"] From a50cdb6c2b07f447c77c44548931299c41d7d34e Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 15 Aug 2024 00:29:12 +0200 Subject: [PATCH 006/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index de137a6002..9fb74c7e3a 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="b73764171bcde9612945dc694c3b95180ecc7f35" +PYXSI_COMMIT="6fef61cd04b8e91de66e499c5ac0ddd007e522ed" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 77680dd06161e2c810afb785b5d7c01c869d393f Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 15 Aug 2024 00:31:31 +0200 Subject: [PATCH 007/102] [Sim] update pyxsi API to include verilog spec, assume precompiled --- src/finn/util/pyxsi.py | 86 ++++++++---------------------------------- 1 file changed, 16 insertions(+), 70 deletions(-) diff --git a/src/finn/util/pyxsi.py b/src/finn/util/pyxsi.py index dc34b3e222..305d38f13f 100644 --- a/src/finn/util/pyxsi.py +++ b/src/finn/util/pyxsi.py @@ -30,79 +30,20 @@ import os import os.path import pyxsi -import shutil -from finn.util.basic import get_finn_root, get_vivado_root, launch_process_helper -from finn.util.pyverilator import make_single_source_file +from finn.util.basic import get_vivado_root, launch_process_helper def compile_sim_obj(top_module_name, source_list, sim_out_dir): - pyxsi_path = get_finn_root() + "/deps/pyxsi" - xsimincl_path = get_vivado_root() + "/data/xsim/include" - # first, copy the pyxsi.so (XSI python bindings) into our sim dir - # TODO skip building the .o/.so files here if they already exist - cmd_build_pybind = [ - "g++", - "-Wall", - "-Werror", - "-g", - "-fPIC", - "-std=c++20", - "-I/usr/include/python3.10", - "-I" + xsimincl_path, - "-Isrc", - "-c", - "-o", - "pybind.o", - "src/pybind.cpp", - ] - launch_process_helper(cmd_build_pybind, cwd=pyxsi_path) - - cmd_build_xsiloader = [ - "g++", - "-Wall", - "-Werror", - "-g", - "-fPIC", - "-std=c++20", - "-I/usr/include/python3.10", - "-I" + xsimincl_path, - "-Isrc", - "-c", - "-o", - "xsi_loader.o", - "src/xsi_loader.cpp", - ] - launch_process_helper(cmd_build_xsiloader, cwd=pyxsi_path) - - cmd_build_pyxsi_so = [ - "g++", - "-Wall", - "-Werror", - "-g", - "-fPIC", - "-std=c++20", - "-I/usr/include/python3.10", - "-I" + xsimincl_path, - "-Isrc", - "-shared", - "-o", - "pyxsi.so", - "pybind.o", - "xsi_loader.o", - "-lfmt", - "-ldl", - ] - launch_process_helper(cmd_build_pyxsi_so, cwd=pyxsi_path) - - shutil.copy(pyxsi_path + "/pyxsi.so", sim_out_dir) - # create a single source file from the list of sources - # TODO this should be no longer necessary for pyxsi since we have .prj files - single_source_file = sim_out_dir + "/all_sources.v" - make_single_source_file(source_list, single_source_file) - # create a .prj file with the single source file + # create a .prj file with the source files with open(sim_out_dir + "/rtlsim.prj", "w") as f: - f.write("verilog work all_sources.v\n") + for src_line in source_list: + if src_line.endswith(".v"): + f.write(f"verilog work {src_line}\n") + elif src_line.endswith(".vhd"): + f.write(f"vhdl2008 work {src_line}\n") + else: + raise Exception(f"Unknown extension for .prj file sources: {src_line}") # now call xelab to generate the .so for the design to be simulated # TODO make debug controllable to allow faster sim when desired @@ -127,13 +68,18 @@ def compile_sim_obj(top_module_name, source_list, sim_out_dir): return (sim_out_dir, out_so_relative_path) -def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None): +def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): vivadolib_path = get_vivado_root() + "/lib/lnx64.o" oldcwd = os.getcwd() os.chdir(sim_out_dir) oldld = os.environ["LD_LIBRARY_PATH"] os.environ["LD_LIBRARY_PATH"] = vivadolib_path - sim = pyxsi.XSI(out_so_relative_path, tracefile=tracefile, logfile="rtlsim.log") + sim = pyxsi.XSI( + out_so_relative_path, + is_toplevel_verilog=is_toplevel_verilog, + tracefile=tracefile, + logfile="rtlsim.log", + ) os.chdir(oldcwd) os.environ["LD_LIBRARY_PATH"] = oldld return sim From 1804c474584720f358837a75f1052dd5d6c2b4c5 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 19 Aug 2024 16:25:15 +0200 Subject: [PATCH 008/102] [Deps] ensure pyxsi is compiled + set envvars for its use --- docker/Dockerfile.finn | 2 +- docker/finn_entrypoint.sh | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile.finn b/docker/Dockerfile.finn index df144f3a0d..bd951f7857 100644 --- a/docker/Dockerfile.finn +++ b/docker/Dockerfile.finn @@ -76,7 +76,7 @@ RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config RUN locale-gen "en_US.UTF-8" # install Verilator from source to get the right version -RUN apt-get install -y git perl make autoconf g++ flex bison ccache libgoogle-perftools-dev numactl perl-doc libfl2 libfl-dev zlib1g zlib1g-dev +RUN apt-get install -y git perl make autoconf g++-10 flex bison ccache libgoogle-perftools-dev numactl perl-doc libfl2 libfl-dev zlib1g zlib1g-dev RUN git clone https://github.com/verilator/verilator RUN cd verilator && \ git checkout v4.224 && \ diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index c7500bcaa6..d217b3a640 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -137,6 +137,18 @@ else echo "See https://docs.xilinx.com/r/en-US/ug835-vivado-tcl-commands/Tcl-Initialization-Scripts" fi + +if [ -f "${FINN_ROOT}/deps/pyxsi/pyxsi.so" ];then + gecho "Found pyxsi at ${FINN_ROOT}/deps/pyxsi/pyxsi.so" +else + OLDPWD=$(pwd) + cd ${FINN_ROOT}/deps/pyxsi + touch .dockerenv + make + cd $OLDPWD +fi +export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o export PATH=$PATH:$HOME/.local/bin # execute the provided command(s) as root exec "$@" From 64e9fc14e62ecb8be25bacfa8413b4b92cc612ea Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 19 Aug 2024 16:26:10 +0200 Subject: [PATCH 009/102] [Sim] remove LD_LIBRARY_PATH setting from pyxsi util, doesn't work --- src/finn/util/pyxsi.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/finn/util/pyxsi.py b/src/finn/util/pyxsi.py index 305d38f13f..79ce1142cb 100644 --- a/src/finn/util/pyxsi.py +++ b/src/finn/util/pyxsi.py @@ -31,7 +31,7 @@ import os.path import pyxsi -from finn.util.basic import get_vivado_root, launch_process_helper +from finn.util.basic import launch_process_helper def compile_sim_obj(top_module_name, source_list, sim_out_dir): @@ -69,11 +69,8 @@ def compile_sim_obj(top_module_name, source_list, sim_out_dir): def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): - vivadolib_path = get_vivado_root() + "/lib/lnx64.o" oldcwd = os.getcwd() os.chdir(sim_out_dir) - oldld = os.environ["LD_LIBRARY_PATH"] - os.environ["LD_LIBRARY_PATH"] = vivadolib_path sim = pyxsi.XSI( out_so_relative_path, is_toplevel_verilog=is_toplevel_verilog, @@ -81,7 +78,6 @@ def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_ logfile="rtlsim.log", ) os.chdir(oldcwd) - os.environ["LD_LIBRARY_PATH"] = oldld return sim From d480e4927206a5c4620bf93540c350824df36535 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 20 Aug 2024 13:39:38 +0100 Subject: [PATCH 010/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 9fb74c7e3a..d0699f7709 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="6fef61cd04b8e91de66e499c5ac0ddd007e522ed" +PYXSI_COMMIT="69898ad64c245f2a052a01167524a8a159e10bc7" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 356093377dbf72de8cef8316d05e1792fa1c059b Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 20 Aug 2024 14:20:38 +0100 Subject: [PATCH 011/102] [AddStreams] set rtlsim_cycles for pyxsi prototyping --- src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 88205df97c..79b4aca279 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -280,6 +280,7 @@ def rtlsim(self, sim, inp, inp2=None): io_dict = {"inputs": {"in0": inp, "in1": inp2}, "outputs": {"out": []}} num_out_values = self.get_number_output_values() sname = "_" + self.hls_sname() + "_" - rtlsim_multi_io(sim, io_dict, num_out_values, sname=sname) + total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, sname=sname) + self.set_nodeattr("cycles_rtlsim", total_cycle_count) return io_dict["outputs"]["out"] From 212927c13cacad0d64c5775c5f7c26928cbf2ac9 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Wed, 21 Aug 2024 13:23:53 +0100 Subject: [PATCH 012/102] [Infra] only trigger pyxsi compile if Vivado detected --- docker/finn_entrypoint.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index d217b3a640..70ac8d85f7 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -129,6 +129,18 @@ if [ -d "$FINN_ROOT/.Xilinx" ]; then mkdir "$HOME/.Xilinx/Vivado/" cp "$FINN_ROOT/.Xilinx/Vivado/Vivado_init.tcl" "$HOME/.Xilinx/Vivado/" gecho "Found Vivado_init.tcl and copied to $HOME/.Xilinx/Vivado/Vivado_init.tcl" + # pyxsi depends on Vivado so only triggered after we find Vivado + if [ -f "${FINN_ROOT}/deps/pyxsi/pyxsi.so" ]; then + gecho "Found pyxsi at ${FINN_ROOT}/deps/pyxsi/pyxsi.so" + else + OLDPWD=$(pwd) + cd ${FINN_ROOT}/deps/pyxsi + touch .dockerenv + make + cd $OLDPWD + fi + export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o else yecho "Unable to find $FINN_ROOT/.Xilinx/Vivado/Vivado_init.tcl" fi @@ -137,18 +149,6 @@ else echo "See https://docs.xilinx.com/r/en-US/ug835-vivado-tcl-commands/Tcl-Initialization-Scripts" fi - -if [ -f "${FINN_ROOT}/deps/pyxsi/pyxsi.so" ];then - gecho "Found pyxsi at ${FINN_ROOT}/deps/pyxsi/pyxsi.so" -else - OLDPWD=$(pwd) - cd ${FINN_ROOT}/deps/pyxsi - touch .dockerenv - make - cd $OLDPWD -fi -export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o export PATH=$PATH:$HOME/.local/bin # execute the provided command(s) as root exec "$@" From 4fd5610d0ebd633dcb3e4c633ddde2a77ebf523d Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Wed, 21 Aug 2024 14:10:52 +0100 Subject: [PATCH 013/102] [Sim] move pyxsiutils to own repo, update deps, fix entrypoint chk --- docker/finn_entrypoint.sh | 29 +- fetch-repos.sh | 2 +- .../fpgadataflow/hls/addstreams_hls.py | 2 +- src/finn/util/pyxsi.py | 270 ------------------ 4 files changed, 19 insertions(+), 284 deletions(-) delete mode 100644 src/finn/util/pyxsi.py diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 70ac8d85f7..54bd21f907 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -105,6 +105,22 @@ else fi fi +if [ -z "${XILINX_VIVADO}" ]; then + yecho "pyxsi will be unavailable since Vivado was not found" +else + if [ -f "${FINN_ROOT}/deps/pyxsi/pyxsi.so" ]; then + gecho "Found pyxsi at ${FINN_ROOT}/deps/pyxsi/pyxsi.so" + else + OLDPWD=$(pwd) + cd ${FINN_ROOT}/deps/pyxsi + touch .dockerenv + make + cd $OLDPWD + fi + export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o +fi + if [ -f "$HLS_PATH/settings64.sh" ];then # source Vitis HLS env.vars source $HLS_PATH/settings64.sh @@ -129,18 +145,7 @@ if [ -d "$FINN_ROOT/.Xilinx" ]; then mkdir "$HOME/.Xilinx/Vivado/" cp "$FINN_ROOT/.Xilinx/Vivado/Vivado_init.tcl" "$HOME/.Xilinx/Vivado/" gecho "Found Vivado_init.tcl and copied to $HOME/.Xilinx/Vivado/Vivado_init.tcl" - # pyxsi depends on Vivado so only triggered after we find Vivado - if [ -f "${FINN_ROOT}/deps/pyxsi/pyxsi.so" ]; then - gecho "Found pyxsi at ${FINN_ROOT}/deps/pyxsi/pyxsi.so" - else - OLDPWD=$(pwd) - cd ${FINN_ROOT}/deps/pyxsi - touch .dockerenv - make - cd $OLDPWD - fi - export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o + else yecho "Unable to find $FINN_ROOT/.Xilinx/Vivado/Vivado_init.tcl" fi diff --git a/fetch-repos.sh b/fetch-repos.sh index d0699f7709..ee8fd7c517 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="69898ad64c245f2a052a01167524a8a159e10bc7" +PYXSI_COMMIT="746c77445731a540b2c8e15575a7ee760ada11f8" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 79b4aca279..0ac800c336 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -28,12 +28,12 @@ import numpy as np import os +from pyxsi_utils import compile_sim_obj, load_sim_obj, reset_rtlsim, rtlsim_multi_io from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.basic import make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -from finn.util.pyxsi import compile_sim_obj, load_sim_obj, reset_rtlsim, rtlsim_multi_io class AddStreams_hls(AddStreams, HLSBackend): diff --git a/src/finn/util/pyxsi.py b/src/finn/util/pyxsi.py deleted file mode 100644 index 79ce1142cb..0000000000 --- a/src/finn/util/pyxsi.py +++ /dev/null @@ -1,270 +0,0 @@ -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import errno -import os -import os.path -import pyxsi - -from finn.util.basic import launch_process_helper - - -def compile_sim_obj(top_module_name, source_list, sim_out_dir): - # create a .prj file with the source files - with open(sim_out_dir + "/rtlsim.prj", "w") as f: - for src_line in source_list: - if src_line.endswith(".v"): - f.write(f"verilog work {src_line}\n") - elif src_line.endswith(".vhd"): - f.write(f"vhdl2008 work {src_line}\n") - else: - raise Exception(f"Unknown extension for .prj file sources: {src_line}") - - # now call xelab to generate the .so for the design to be simulated - # TODO make debug controllable to allow faster sim when desired - cmd_xelab = [ - "xelab", - "work." + top_module_name, - "-prj", - "rtlsim.prj", - "-debug", - "all", - "-dll", - "-s", - top_module_name, - ] - launch_process_helper(cmd_xelab, cwd=sim_out_dir) - out_so_relative_path = "xsim.dir/%s/xsimk.so" % top_module_name - out_so_full_path = sim_out_dir + "/" + out_so_relative_path - - if not os.path.isfile(out_so_full_path): - raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), out_so_full_path) - - return (sim_out_dir, out_so_relative_path) - - -def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): - oldcwd = os.getcwd() - os.chdir(sim_out_dir) - sim = pyxsi.XSI( - out_so_relative_path, - is_toplevel_verilog=is_toplevel_verilog, - tracefile=tracefile, - logfile="rtlsim.log", - ) - os.chdir(oldcwd) - return sim - - -def _find_signal(sim, signal_name): - signal_list = [sim.get_port_name(i) for i in range(sim.get_port_count())] - # handle both mixed caps and lowercase signal names - if signal_name in signal_list: - return signal_name - elif signal_name.lower() in signal_list: - return signal_name.lower() - else: - raise Exception("Signal not found: " + signal_name) - - -def _read_signal(sim, signal_name): - signal_name = _find_signal(sim, signal_name) - port_val = sim.get_port_value(signal_name) - return int(port_val, 2) - - -def _write_signal(sim, signal_name, signal_value): - signal_name = _find_signal(sim, signal_name) - signal_len = len(sim.get_port_value(signal_name)) - if signal_value < 0: - raise Exception("TODO: _write_signal needs fix for 2s complement neg values") - signal_bin_value = f"{signal_value:0{signal_len}b}"[-signal_len:] - sim.set_port_value(signal_name, signal_bin_value) - - -def reset_rtlsim(sim, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): - _write_signal(sim, clk_name, 0) - _write_signal(sim, rst_name, 0 if active_low else 1) - for _ in range(2): - toggle_clk(sim, clk_name) - - signals_to_write = {} - signals_to_write[rst_name] = 1 if active_low else 0 - toggle_clk(sim, clk_name, signals_to_write) - toggle_clk(sim, clk_name) - - -def toggle_clk(sim, clk_name="ap_clk", signals_to_write={}): - """Toggles the clock input in pyverilator once.""" - toggle_neg_edge(sim, clk_name=clk_name) - toggle_pos_edge(sim, clk_name=clk_name, signals_to_write=signals_to_write) - - -def toggle_neg_edge(sim, clk_name="ap_clk"): - _write_signal(sim, clk_name, 0) - sim.run(5000) - - -def toggle_pos_edge(sim, clk_name="ap_clk", signals_to_write={}): - _write_signal(sim, clk_name, 1) - sim.run(5000) - # Write IO signals a delta cycle after rising edge - if bool(signals_to_write): # if dict non-empty - for sig in signals_to_write.keys(): - _write_signal(sim, sig, signals_to_write[sig]) - comb_update_and_trace(sim) - - -def comb_update_and_trace(sim): - # TODO anything needed here for tracing or updates? - pass - - -def rtlsim_multi_io( - sim, - io_dict, - num_out_values, - trace_file="", - sname="_V_V_", - liveness_threshold=10000, - hook_preclk=None, - hook_postclk=None, -): - """Runs the XSI-based simulation by passing the input values to the simulation, - toggle the clock and observing the execution time. Function contains also an - observation loop that can abort the simulation if no output value is produced - after a set number of cycles. Can handle multiple i/o streams. See function - implementation for details on how the top-level signals should be named. - - Arguments: - - * sim: the pyxsi object for simulation - * io_dict: a dict of dicts in the following format: - {"inputs" : {"in0" : , "in1" : }, - "outputs" : {"out0" : [], "out1" : []} } - is a list of Python arbitrary-precision ints indicating - what data to push into the simulation, and the output lists are - similarly filled when the simulation is complete - * num_out_values: number of total values to be read from the simulation to - finish the simulation and return. - * trace_file: vcd dump filename, empty string (no vcd dump) by default - * sname: signal naming for streams, "_V_V_" by default, vitis_hls uses "_V_" - * liveness_threshold: if no new output is detected after this many cycles, - terminate simulation - * hook_preclk: hook function to call prior to clock tick - * hook_postclk: hook function to call after clock tick - - Returns: number of clock cycles elapsed for completion - - """ - - # TODO tracing/debug - - for outp in io_dict["outputs"]: - _write_signal(sim, outp + sname + "TREADY", 1) - - # observe if output is completely calculated - # total_cycle_count will contain the number of cycles the calculation ran - output_done = False - total_cycle_count = 0 - output_count = 0 - old_output_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - - # Dictionary that will hold the signals to drive to DUT - signals_to_write = {} - - while not (output_done): - if hook_preclk: - hook_preclk(sim) - # Toggle falling edge to arrive at a delta cycle before the rising edge - toggle_neg_edge(sim) - - # examine signals, decide how to act based on that but don't update yet - # so only _read_signal access in this block, no _write_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - if ( - _read_signal(sim, signal_name + "TREADY") == 1 - and _read_signal(sim, signal_name + "TVALID") == 1 - ): - inputs = inputs[1:] - io_dict["inputs"][inp] = inputs - - for outp in io_dict["outputs"]: - outputs = io_dict["outputs"][outp] - signal_name = outp + sname - if ( - _read_signal(sim, signal_name + "TREADY") == 1 - and _read_signal(sim, signal_name + "TVALID") == 1 - ): - outputs = outputs + [_read_signal(sim, signal_name + "TDATA")] - output_count += 1 - io_dict["outputs"][outp] = outputs - - # update signals based on decisions in previous block, but don't examine anything - # so only _write_signal access in this block, no _read_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 - signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 - - # Toggle rising edge to arrive at a delta cycle before the falling edge - toggle_pos_edge(sim, signals_to_write=signals_to_write) - if hook_postclk: - hook_postclk(sim) - - total_cycle_count = total_cycle_count + 1 - - if output_count == old_output_count: - no_change_count = no_change_count + 1 - else: - no_change_count = 0 - old_output_count = output_count - - # check if all expected output words received - if output_count == num_out_values: - output_done = True - - # end sim on timeout - if no_change_count == liveness_threshold: - # TODO end tracing? - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the LIVENESS_THRESHOLD env.var. to a " - "larger value." - ) - - # TODO end tracing? - - return total_cycle_count From 6e4d08548bd4327db20de78907fc92057dfe3f38 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 9 Sep 2024 07:45:22 +0100 Subject: [PATCH 014/102] [pyxsi] trialling RPC server as LD_LIBRARY_PATH workaround --- .../fpgadataflow/hls/addstreams_hls.py | 14 +++++++---- src/finn/util/pyxsi_rpcclient.py | 15 ++++++++++++ src/finn/util/pyxsi_rpcserver.py | 24 +++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 src/finn/util/pyxsi_rpcclient.py create mode 100644 src/finn/util/pyxsi_rpcserver.py diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 0ac800c336..67d40d3051 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -28,12 +28,12 @@ import numpy as np import os -from pyxsi_utils import compile_sim_obj, load_sim_obj, reset_rtlsim, rtlsim_multi_io from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.basic import make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy +from finn.util.pyxsi_rpcclient import PyXSIRPCProxy class AddStreams_hls(AddStreams, HLSBackend): @@ -258,7 +258,9 @@ def prepare_rtlsim(self): verilog_files = self.get_all_verilog_filenames(abspath=True) single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") - ret = compile_sim_obj(self.get_verilog_top_module_name(), verilog_files, single_src_dir) + ret = PyXSIRPCProxy().compile_sim_obj( + self.get_verilog_top_module_name(), verilog_files, single_src_dir + ) # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", ret[0] + "/" + ret[1]) @@ -268,7 +270,7 @@ def get_rtlsim(self): sim_base, sim_rel = sim_xo_path.split("xsim.dir") sim_rel = "xsim.dir" + sim_rel tracefile = "trace.wdb" - return load_sim_obj(sim_base, sim_rel, tracefile) + return PyXSIRPCProxy().load_sim_obj(sim_base, sim_rel, tracefile) def rtlsim(self, sim, inp, inp2=None): """Runs the pyverilator simulation by passing the input values to the simulation, @@ -276,11 +278,13 @@ def rtlsim(self, sim, inp, inp2=None): observation loop that can abort the simulation if no output value is produced after 100 cycles.""" - reset_rtlsim(sim) + PyXSIRPCProxy().reset_rtlsim(sim) io_dict = {"inputs": {"in0": inp, "in1": inp2}, "outputs": {"out": []}} num_out_values = self.get_number_output_values() sname = "_" + self.hls_sname() + "_" - total_cycle_count = rtlsim_multi_io(sim, io_dict, num_out_values, sname=sname) + total_cycle_count = PyXSIRPCProxy().rtlsim_multi_io( + sim, io_dict, num_out_values, sname=sname + ) self.set_nodeattr("cycles_rtlsim", total_cycle_count) return io_dict["outputs"]["out"] diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py new file mode 100644 index 0000000000..29f519ead6 --- /dev/null +++ b/src/finn/util/pyxsi_rpcclient.py @@ -0,0 +1,15 @@ +import xmlrpc.client + + +class PyXSIRPCProxy(object): + def __init__(self, wrapped=None): + if wrapped is None: + wrapped = xmlrpc.client.ServerProxy("http://localhost:8000") + self.wrapped = wrapped + + def __getattr__(self, name): + attr = getattr(self.wrapped, name) + return type(self)(attr) + + def __call__(self, *args, **kw): + return self.wrapped(*args, **kw) diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py new file mode 100644 index 0000000000..01ef36c98e --- /dev/null +++ b/src/finn/util/pyxsi_rpcserver.py @@ -0,0 +1,24 @@ +import os +import pyxsi_utils +from xmlrpc.server import SimpleXMLRPCRequestHandler, SimpleXMLRPCServer + +try: + ldlp = os.environ["LD_LIBRARY_PATH"] + if not ("Vivado" in ldlp): + assert False, "Must be launched with LD_LIBRARY_PATH=$(XILINX_VIVADO)/lib/lnx64.o" +except KeyError: + assert False, "Must be launched with LD_LIBRARY_PATH=$(XILINX_VIVADO)/lib/lnx64.o" + + +# Restrict to a particular path. +class RequestHandler(SimpleXMLRPCRequestHandler): + rpc_paths = ("/RPC2",) + + +# Create server +port = 8000 +with SimpleXMLRPCServer(("localhost", port), requestHandler=RequestHandler) as server: + server.register_introspection_functions() + server.register_instance(pyxsi_utils) + print(f"pyxsi RPC server is now running at {port}") + server.serve_forever() From 4558314cf503a185cf56e87dc745817601c45a9d Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 1 Oct 2024 11:45:28 +0200 Subject: [PATCH 015/102] [Infra] remove global LD_LIBRARY_PATH setting from entrypoint --- docker/finn_entrypoint.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 54bd21f907..1eb9345f89 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -118,7 +118,6 @@ else cd $OLDPWD fi export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${XILINX_VIVADO}/lib/lnx64.o fi if [ -f "$HLS_PATH/settings64.sh" ];then From aff109fd9646ad7cdb59fa629d970610dd29114c Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 1 Oct 2024 11:46:44 +0200 Subject: [PATCH 016/102] [pyxsi] add license text to rpcserver&client --- src/finn/util/pyxsi_rpcclient.py | 28 ++++++++++++++++++++++++++++ src/finn/util/pyxsi_rpcserver.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index 29f519ead6..a985d72608 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -1,3 +1,31 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of pyxsi nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + import xmlrpc.client diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py index 01ef36c98e..3581638ca5 100644 --- a/src/finn/util/pyxsi_rpcserver.py +++ b/src/finn/util/pyxsi_rpcserver.py @@ -1,7 +1,39 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of pyxsi nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + import os import pyxsi_utils from xmlrpc.server import SimpleXMLRPCRequestHandler, SimpleXMLRPCServer +# since simulation with XSI requires a certain LD_LIBRARY_PATH setting +# which breaks other applications, we launch the simulation in its +# own executable with this env.var. setting, and use xmlrpc to access it + try: ldlp = os.environ["LD_LIBRARY_PATH"] if not ("Vivado" in ldlp): From 39ac22ef9a0833a9953d819135723e2ca44e6fd0 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 3 Oct 2024 18:27:03 +0100 Subject: [PATCH 017/102] [Infra] start pyxsi rpc server in entrypoint --- docker/finn_entrypoint.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 1eb9345f89..d910df42be 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -118,6 +118,10 @@ else cd $OLDPWD fi export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py + # launch the pyxsi RPC server and let it run in the background + gecho "Launching pyxsi RPC server..." + LD_LIBRARY_PATH=${XILINX_VIVADO}/lib/lnx64.o python ${FINN_ROOT}/src/finn/util/pyxsi_rpcserver.py & + sleep 1 fi if [ -f "$HLS_PATH/settings64.sh" ];then From 396cb8ad4721018f7e21be9bdc8cc81a1f5f3b97 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 3 Oct 2024 20:36:22 +0100 Subject: [PATCH 018/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index ee8fd7c517..c5548de9c0 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="746c77445731a540b2c8e15575a7ee760ada11f8" +PYXSI_COMMIT="1451a2fd57f406cc6fb5b9128eae48eb3dad00ad" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 948b5b3377af2a8089a4506b329dbbaf4267bbc2 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 3 Oct 2024 20:38:04 +0100 Subject: [PATCH 019/102] [Infra] redirect pyxsi rpcserver outputs to own logfile --- docker/finn_entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index d910df42be..e53a5fb071 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -120,7 +120,7 @@ else export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py # launch the pyxsi RPC server and let it run in the background gecho "Launching pyxsi RPC server..." - LD_LIBRARY_PATH=${XILINX_VIVADO}/lib/lnx64.o python ${FINN_ROOT}/src/finn/util/pyxsi_rpcserver.py & + LD_LIBRARY_PATH=${XILINX_VIVADO}/lib/lnx64.o python ${FINN_ROOT}/src/finn/util/pyxsi_rpcserver.py &> ${FINN_BUILD_DIR}/pyxsi_rpcserver.log & sleep 1 fi From 159e2286bff1da29deb2ad655220605b1e109406 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 3 Oct 2024 20:39:31 +0100 Subject: [PATCH 020/102] [pyxsi] rework RPC interface to exclude rtlsim_multi_io --- src/finn/util/pyxsi_rpcclient.py | 135 +++++++++++++++++++++++++++++-- src/finn/util/pyxsi_rpcserver.py | 75 ++++++++++++++++- 2 files changed, 201 insertions(+), 9 deletions(-) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index a985d72608..fdda853601 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -32,12 +32,135 @@ class PyXSIRPCProxy(object): def __init__(self, wrapped=None): if wrapped is None: - wrapped = xmlrpc.client.ServerProxy("http://localhost:8000") + wrapped = xmlrpc.client.ServerProxy("http://localhost:8000", allow_none=True) self.wrapped = wrapped - def __getattr__(self, name): - attr = getattr(self.wrapped, name) - return type(self)(attr) + def compile_sim_obj(self, top_module_name, source_list, sim_out_dir): + ret = self.wrapped.compile_sim_obj(top_module_name, source_list, sim_out_dir) + return ret - def __call__(self, *args, **kw): - return self.wrapped(*args, **kw) + def load_sim_obj( + self, sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True + ): + ret = self.wrapped.load_sim_obj( + sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog + ) + return ret + + def find_signal(self, sim_id, signal_name): + ret = self.wrapped.find_signal(sim_id, signal_name) + return ret + + def read_signal(self, sim_id, signal_name): + signal_value_str = self.wrapped.read_signal(sim_id, signal_name) + signal_value = int(signal_value_str) + return signal_value + + def write_signal(self, sim_id, signal_name, signal_value): + signal_value_str = str(signal_value) + self.wrapped.write_signal(sim_id, signal_name, signal_value_str) + + def reset_rtlsim(self, sim_id, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): + self.wrapped.reset_rtlsim(sim_id, rst_name, active_low, clk_name) + + def toggle_clk(self, sim_id, clk_name="ap_clk"): + self.wrapped.toggle_clk(sim_id, clk_name) + + def toggle_neg_edge(self, sim_id, clk_name="ap_clk"): + self.wrapped.toggle_neg_edge(sim_id, clk_name) + + def toggle_pos_edge(self, sim_id, clk_name="ap_clk"): + self.wrapped.toggle_pos_edge(sim_id, clk_name) + + def rtlsim_multi_io( + self, + sim_id, + io_dict, + num_out_values, + sname="_V_V_", + liveness_threshold=10000, + hook_preclk=None, + hook_postclk=None, + ): + for outp in io_dict["outputs"]: + self.write_signal(sim_id, outp + sname + "TREADY", 1) + + # observe if output is completely calculated + # total_cycle_count will contain the number of cycles the calculation ran + output_done = False + total_cycle_count = 0 + output_count = 0 + old_output_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + + while not (output_done): + signals_to_write = {} + if hook_preclk: + hook_preclk(sim_id) + # Toggle falling edge to arrive at a delta cycle before the rising edge + self.toggle_neg_edge(sim_id) + + # examine signals, decide how to act based on that but don't update yet + # so only read_signal access in this block, no _write_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + if ( + self.read_signal(sim_id, signal_name + "TREADY") == 1 + and self.read_signal(sim_id, signal_name + "TVALID") == 1 + ): + inputs = inputs[1:] + io_dict["inputs"][inp] = inputs + + for outp in io_dict["outputs"]: + outputs = io_dict["outputs"][outp] + signal_name = outp + sname + if ( + self.read_signal(sim_id, signal_name + "TREADY") == 1 + and self.read_signal(sim_id, signal_name + "TVALID") == 1 + ): + outputs = outputs + [self.read_signal(sim_id, signal_name + "TDATA")] + output_count += 1 + io_dict["outputs"][outp] = outputs + + # update signals based on decisions in previous block, but don't examine anything + # so only write_signal access in this block, no read_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 + signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 + + # Toggle rising edge to arrive at a delta cycle before the falling edge + self.toggle_pos_edge(sim_id) + + for k, v in signals_to_write.items(): + self.write_signal(sim_id, k, v) + + if hook_postclk: + hook_postclk(sim_id) + + total_cycle_count = total_cycle_count + 1 + + if output_count == old_output_count: + no_change_count = no_change_count + 1 + else: + no_change_count = 0 + old_output_count = output_count + + # check if all expected output words received + if output_count == num_out_values: + output_done = True + + # end sim on timeout + if no_change_count == liveness_threshold: + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the liveness_threshold parameter to a " + "larger value." + ) + + return total_cycle_count diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py index 3581638ca5..650a4ebabd 100644 --- a/src/finn/util/pyxsi_rpcserver.py +++ b/src/finn/util/pyxsi_rpcserver.py @@ -42,15 +42,84 @@ assert False, "Must be launched with LD_LIBRARY_PATH=$(XILINX_VIVADO)/lib/lnx64.o" -# Restrict to a particular path. class RequestHandler(SimpleXMLRPCRequestHandler): rpc_paths = ("/RPC2",) +# we need to do some conversions while pyxsi calls are going through xmlrpc: +# * sim objs become strings (stored in the sim_id_to_obj dict until done) +# * signal values become strings +# (converted back and forth to Python integers) + +sim_id_to_obj = {} + + +def compile_sim_obj(top_module_name, source_list, sim_out_dir): + ret = pyxsi_utils.compile_sim_obj(top_module_name, source_list, sim_out_dir) + return ret + + +def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): + ret_sim_obj = pyxsi_utils.load_sim_obj( + sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog + ) + ret_sim_id = str(id(ret_sim_obj)) + sim_id_to_obj[ret_sim_id] = ret_sim_obj + return ret_sim_id + + +def find_signal(sim_id, signal_name): + sim = sim_id_to_obj[sim_id] + return pyxsi_utils._find_signal(sim, signal_name) + + +def read_signal(sim_id, signal_name): + sim = sim_id_to_obj[sim_id] + signal_value = pyxsi_utils._read_signal(sim, signal_name) + signal_value_str = str(signal_value) + return signal_value_str + + +def write_signal(sim_id, signal_name, signal_value_str): + sim = sim_id_to_obj[sim_id] + signal_value = int(signal_value_str) + pyxsi_utils._write_signal(sim, signal_name, signal_value) + + +def reset_rtlsim(sim_id, rst_name, active_low, clk_name): + sim = sim_id_to_obj[sim_id] + pyxsi_utils.reset_rtlsim(sim, rst_name, active_low, clk_name) + + +def toggle_clk(sim_id, clk_name): + sim = sim_id_to_obj[sim_id] + pyxsi_utils.toggle_clk(sim, clk_name) + + +def toggle_neg_edge(sim_id, clk_name): + sim = sim_id_to_obj[sim_id] + pyxsi_utils.toggle_neg_edge(sim, clk_name) + + +def toggle_pos_edge(sim_id, clk_name): + sim = sim_id_to_obj[sim_id] + pyxsi_utils.toggle_pos_edge(sim, clk_name) + + # Create server port = 8000 -with SimpleXMLRPCServer(("localhost", port), requestHandler=RequestHandler) as server: +with SimpleXMLRPCServer( + ("localhost", port), requestHandler=RequestHandler, allow_none=True +) as server: server.register_introspection_functions() - server.register_instance(pyxsi_utils) + server.register_function(compile_sim_obj) + server.register_function(load_sim_obj) + server.register_function(find_signal) + server.register_function(read_signal) + server.register_function(write_signal) + server.register_function(reset_rtlsim) + server.register_function(toggle_clk) + server.register_function(toggle_neg_edge) + server.register_function(toggle_pos_edge) print(f"pyxsi RPC server is now running at {port}") server.serve_forever() From 6250c8f8b71efd73e370f51a6a330d0c63e99f54 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Sat, 5 Oct 2024 00:12:26 +0200 Subject: [PATCH 021/102] [Infra] don't launch pyxsi RPC server on startup (will be as needed) --- docker/finn_entrypoint.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index e53a5fb071..1eb9345f89 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -118,10 +118,6 @@ else cd $OLDPWD fi export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py - # launch the pyxsi RPC server and let it run in the background - gecho "Launching pyxsi RPC server..." - LD_LIBRARY_PATH=${XILINX_VIVADO}/lib/lnx64.o python ${FINN_ROOT}/src/finn/util/pyxsi_rpcserver.py &> ${FINN_BUILD_DIR}/pyxsi_rpcserver.log & - sleep 1 fi if [ -f "$HLS_PATH/settings64.sh" ];then From 42cbe0fd66ed7b6b16ff923e99f659b98d578211 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Sat, 5 Oct 2024 00:13:07 +0200 Subject: [PATCH 022/102] [pyxsi] start (and then terminate when done) one pyxsi RPC server per rtlsim --- .../fpgadataflow/hls/addstreams_hls.py | 13 +- src/finn/util/pyxsi_rpcclient.py | 308 ++++++++++-------- src/finn/util/pyxsi_rpcserver.py | 10 +- 3 files changed, 185 insertions(+), 146 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 67d40d3051..e55fcf8a27 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -29,11 +29,11 @@ import numpy as np import os +import finn.util.pyxsi_rpcclient as pyxsi_rpcclient from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.basic import make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -from finn.util.pyxsi_rpcclient import PyXSIRPCProxy class AddStreams_hls(AddStreams, HLSBackend): @@ -130,6 +130,7 @@ def execute_node(self, context, graph): # super().reset_rtlsim(sim) # super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp0, rtlsim_inp1) + pyxsi_rpcclient.close_sim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() @@ -258,7 +259,7 @@ def prepare_rtlsim(self): verilog_files = self.get_all_verilog_filenames(abspath=True) single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") - ret = PyXSIRPCProxy().compile_sim_obj( + ret = pyxsi_rpcclient.compile_sim_obj( self.get_verilog_top_module_name(), verilog_files, single_src_dir ) @@ -269,8 +270,8 @@ def get_rtlsim(self): sim_xo_path = self.get_nodeattr("rtlsim_so") sim_base, sim_rel = sim_xo_path.split("xsim.dir") sim_rel = "xsim.dir" + sim_rel - tracefile = "trace.wdb" - return PyXSIRPCProxy().load_sim_obj(sim_base, sim_rel, tracefile) + tracefile = None + return pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, tracefile) def rtlsim(self, sim, inp, inp2=None): """Runs the pyverilator simulation by passing the input values to the simulation, @@ -278,11 +279,11 @@ def rtlsim(self, sim, inp, inp2=None): observation loop that can abort the simulation if no output value is produced after 100 cycles.""" - PyXSIRPCProxy().reset_rtlsim(sim) + pyxsi_rpcclient.reset_rtlsim(sim) io_dict = {"inputs": {"in0": inp, "in1": inp2}, "outputs": {"out": []}} num_out_values = self.get_number_output_values() sname = "_" + self.hls_sname() + "_" - total_cycle_count = PyXSIRPCProxy().rtlsim_multi_io( + total_cycle_count = pyxsi_rpcclient.rtlsim_multi_io( sim, io_dict, num_out_values, sname=sname ) self.set_nodeattr("cycles_rtlsim", total_cycle_count) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index fdda853601..9be41b8fa3 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -26,141 +26,181 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import pyxsi_utils +import subprocess import xmlrpc.client +from finn.util.basic import get_finn_root, get_vivado_root + + +def compile_sim_obj(top_module_name, source_list, sim_out_dir): + # compile_sim_obj does not require special envvar settings and is safe to call + # directly without any RPC + ret = pyxsi_utils.compile_sim_obj(top_module_name, source_list, sim_out_dir) + return ret + + +def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): + # launch a pyxsi RPC server + proc_env = os.environ.copy() + proc_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" + command = ["python", "-u", get_finn_root() + "/src/finn/util/pyxsi_rpcserver.py"] + proc = subprocess.Popen( + command, + bufsize=1, + env=proc_env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, + ) + rpc_port = 8000 + line = proc.stdout.readline() + if "pyxsi RPC server is now running on" in line: + rpc_port = int(line.split(" on ")[1]) + else: + assert False, "Unexpected output from pyxsi RPC server" + rpc_proxy = xmlrpc.client.ServerProxy(f"http://localhost:{rpc_port}", allow_none=True) + sim_id = rpc_proxy.load_sim_obj( + sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog + ) + # return all relevant information for rtlsim + handle = (sim_id, rpc_proxy, rpc_port, proc) + return handle + + +def close_sim(handle): + (_, _, _, proc) = handle + proc.terminate() + + +def find_signal(handle, signal_name): + (sim_id, rpc_proxy, _, _) = handle + ret = rpc_proxy.find_signal(sim_id, signal_name) + return ret + + +def read_signal(handle, signal_name): + (sim_id, rpc_proxy, _, _) = handle + signal_value_str = rpc_proxy.read_signal(sim_id, signal_name) + signal_value = int(signal_value_str) + return signal_value + + +def write_signal(handle, signal_name, signal_value): + (sim_id, rpc_proxy, _, _) = handle + signal_value_str = str(signal_value) + rpc_proxy.write_signal(sim_id, signal_name, signal_value_str) + + +def reset_rtlsim(handle, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): + (sim_id, rpc_proxy, _, _) = handle + rpc_proxy.reset_rtlsim(sim_id, rst_name, active_low, clk_name) + + +def toggle_clk(handle, clk_name="ap_clk"): + (sim_id, rpc_proxy, _, _) = handle + rpc_proxy.toggle_clk(sim_id, clk_name) + + +def toggle_neg_edge(handle, clk_name="ap_clk"): + (sim_id, rpc_proxy, _, _) = handle + rpc_proxy.toggle_neg_edge(sim_id, clk_name) + + +def toggle_pos_edge(handle, clk_name="ap_clk"): + (sim_id, rpc_proxy, _, _) = handle + rpc_proxy.toggle_pos_edge(sim_id, clk_name) + + +def rtlsim_multi_io( + handle, + io_dict, + num_out_values, + sname="_V_V_", + liveness_threshold=10000, + hook_preclk=None, + hook_postclk=None, +): + for outp in io_dict["outputs"]: + write_signal(handle, outp + sname + "TREADY", 1) + + # observe if output is completely calculated + # total_cycle_count will contain the number of cycles the calculation ran + output_done = False + total_cycle_count = 0 + output_count = 0 + old_output_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + + while not (output_done): + signals_to_write = {} + if hook_preclk: + hook_preclk(handle) + # Toggle falling edge to arrive at a delta cycle before the rising edge + toggle_neg_edge(handle) + + # examine signals, decide how to act based on that but don't update yet + # so only read_signal access in this block, no _write_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + if ( + read_signal(handle, signal_name + "TREADY") == 1 + and read_signal(handle, signal_name + "TVALID") == 1 + ): + inputs = inputs[1:] + io_dict["inputs"][inp] = inputs -class PyXSIRPCProxy(object): - def __init__(self, wrapped=None): - if wrapped is None: - wrapped = xmlrpc.client.ServerProxy("http://localhost:8000", allow_none=True) - self.wrapped = wrapped - - def compile_sim_obj(self, top_module_name, source_list, sim_out_dir): - ret = self.wrapped.compile_sim_obj(top_module_name, source_list, sim_out_dir) - return ret - - def load_sim_obj( - self, sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True - ): - ret = self.wrapped.load_sim_obj( - sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog - ) - return ret - - def find_signal(self, sim_id, signal_name): - ret = self.wrapped.find_signal(sim_id, signal_name) - return ret - - def read_signal(self, sim_id, signal_name): - signal_value_str = self.wrapped.read_signal(sim_id, signal_name) - signal_value = int(signal_value_str) - return signal_value - - def write_signal(self, sim_id, signal_name, signal_value): - signal_value_str = str(signal_value) - self.wrapped.write_signal(sim_id, signal_name, signal_value_str) - - def reset_rtlsim(self, sim_id, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): - self.wrapped.reset_rtlsim(sim_id, rst_name, active_low, clk_name) - - def toggle_clk(self, sim_id, clk_name="ap_clk"): - self.wrapped.toggle_clk(sim_id, clk_name) - - def toggle_neg_edge(self, sim_id, clk_name="ap_clk"): - self.wrapped.toggle_neg_edge(sim_id, clk_name) - - def toggle_pos_edge(self, sim_id, clk_name="ap_clk"): - self.wrapped.toggle_pos_edge(sim_id, clk_name) - - def rtlsim_multi_io( - self, - sim_id, - io_dict, - num_out_values, - sname="_V_V_", - liveness_threshold=10000, - hook_preclk=None, - hook_postclk=None, - ): for outp in io_dict["outputs"]: - self.write_signal(sim_id, outp + sname + "TREADY", 1) - - # observe if output is completely calculated - # total_cycle_count will contain the number of cycles the calculation ran - output_done = False - total_cycle_count = 0 - output_count = 0 - old_output_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - - while not (output_done): - signals_to_write = {} - if hook_preclk: - hook_preclk(sim_id) - # Toggle falling edge to arrive at a delta cycle before the rising edge - self.toggle_neg_edge(sim_id) - - # examine signals, decide how to act based on that but don't update yet - # so only read_signal access in this block, no _write_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - if ( - self.read_signal(sim_id, signal_name + "TREADY") == 1 - and self.read_signal(sim_id, signal_name + "TVALID") == 1 - ): - inputs = inputs[1:] - io_dict["inputs"][inp] = inputs - - for outp in io_dict["outputs"]: - outputs = io_dict["outputs"][outp] - signal_name = outp + sname - if ( - self.read_signal(sim_id, signal_name + "TREADY") == 1 - and self.read_signal(sim_id, signal_name + "TVALID") == 1 - ): - outputs = outputs + [self.read_signal(sim_id, signal_name + "TDATA")] - output_count += 1 - io_dict["outputs"][outp] = outputs - - # update signals based on decisions in previous block, but don't examine anything - # so only write_signal access in this block, no read_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 - signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 - - # Toggle rising edge to arrive at a delta cycle before the falling edge - self.toggle_pos_edge(sim_id) - - for k, v in signals_to_write.items(): - self.write_signal(sim_id, k, v) - - if hook_postclk: - hook_postclk(sim_id) - - total_cycle_count = total_cycle_count + 1 - - if output_count == old_output_count: - no_change_count = no_change_count + 1 - else: - no_change_count = 0 - old_output_count = output_count - - # check if all expected output words received - if output_count == num_out_values: - output_done = True - - # end sim on timeout - if no_change_count == liveness_threshold: - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the liveness_threshold parameter to a " - "larger value." - ) - - return total_cycle_count + outputs = io_dict["outputs"][outp] + signal_name = outp + sname + if ( + read_signal(handle, signal_name + "TREADY") == 1 + and read_signal(handle, signal_name + "TVALID") == 1 + ): + outputs = outputs + [read_signal(handle, signal_name + "TDATA")] + output_count += 1 + io_dict["outputs"][outp] = outputs + + # update signals based on decisions in previous block, but don't examine anything + # so only write_signal access in this block, no read_signal + for inp in io_dict["inputs"]: + inputs = io_dict["inputs"][inp] + signal_name = inp + sname + signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 + signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 + + # Toggle rising edge to arrive at a delta cycle before the falling edge + toggle_pos_edge(handle) + + for k, v in signals_to_write.items(): + write_signal(handle, k, v) + + if hook_postclk: + hook_postclk(handle) + + total_cycle_count = total_cycle_count + 1 + + if output_count == old_output_count: + no_change_count = no_change_count + 1 + else: + no_change_count = 0 + old_output_count = output_count + + # check if all expected output words received + if output_count == num_out_values: + output_done = True + + # end sim on timeout + if no_change_count == liveness_threshold: + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the liveness_threshold parameter to a " + "larger value." + ) + + return total_cycle_count diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py index 650a4ebabd..209986bcfd 100644 --- a/src/finn/util/pyxsi_rpcserver.py +++ b/src/finn/util/pyxsi_rpcserver.py @@ -106,11 +106,9 @@ def toggle_pos_edge(sim_id, clk_name): pyxsi_utils.toggle_pos_edge(sim, clk_name) -# Create server -port = 8000 -with SimpleXMLRPCServer( - ("localhost", port), requestHandler=RequestHandler, allow_none=True -) as server: +# ask to create server on port 0 to find an available port +with SimpleXMLRPCServer(("localhost", 0), requestHandler=RequestHandler, allow_none=True) as server: + port = server.server_address[1] server.register_introspection_functions() server.register_function(compile_sim_obj) server.register_function(load_sim_obj) @@ -121,5 +119,5 @@ def toggle_pos_edge(sim_id, clk_name): server.register_function(toggle_clk) server.register_function(toggle_neg_edge) server.register_function(toggle_pos_edge) - print(f"pyxsi RPC server is now running at {port}") + print(f"pyxsi RPC server is now running on {port}") server.serve_forever() From 890ce81aa10e96afbd28ee984f596c6c3c2fb42b Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 10:05:23 +0200 Subject: [PATCH 023/102] [rtlsim] pyxsi for node-by-node rtlsim enablement via attribute --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 49 ++-- src/finn/custom_op/fpgadataflow/hwcustomop.py | 231 +++++++++++------- 2 files changed, 170 insertions(+), 110 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c10d93792f 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -32,6 +32,7 @@ from abc import ABC, abstractmethod from qonnx.core.datatype import DataType +import finn.util.pyxsi_rpcclient as pyxsi_rpcclient from finn.custom_op.fpgadataflow import templates from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir from finn.util.hls import CallHLS @@ -87,25 +88,39 @@ def prepare_rtlsim(self): for this node, sets the rtlsim_so attribute to its path and returns a PyVerilator wrapper around it.""" - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - + rtlsim_backend = self.get_nodeattr("rtlsim_backend") verilog_files = self.get_all_verilog_filenames(abspath=True) single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") - tmp_build_dir = make_build_dir("pyverilator_" + self.onnx_node.name + "_") - target_file = single_src_dir + "/" + self.get_verilog_top_module_name() + ".v" - make_single_source_file(verilog_files, target_file) - - # build the Verilator emu library - sim = PyVerilator.build( - self.get_verilog_top_module_name() + ".v", - build_dir=tmp_build_dir, - verilog_path=[single_src_dir], - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) + if rtlsim_backend == "pyverilator": + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + tmp_build_dir = make_build_dir("pyverilator_" + self.onnx_node.name + "_") + target_file = single_src_dir + "/" + self.get_verilog_top_module_name() + ".v" + make_single_source_file(verilog_files, target_file) + + # build the Verilator emu library + sim = PyVerilator.build( + self.get_verilog_top_module_name() + ".v", + build_dir=tmp_build_dir, + verilog_path=[single_src_dir], + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_verilog_top_module_name(), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + elif rtlsim_backend == "pyxsi": + ret = pyxsi_rpcclient.compile_sim_obj( + self.get_verilog_top_module_name(), verilog_files, single_src_dir + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", ret[0] + "/" + ret[1]) + # TODO return val of this function is never used + # refactor s.t. it does not return anything at all, + # consistently between pyverilator and pyxsi + sim = None + else: + assert False, "Unknown rtlsim_backend" + return sim def code_generation_ipgen(self, model, fpgapart, clk): diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index b40b8f3074..36eb3df536 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -41,6 +41,8 @@ except ModuleNotFoundError: PyVerilator = None +import finn.util.pyxsi_rpcclient as pyxsi_rpcclient + class HWCustomOp(CustomOp): """HWCustomOp class all custom ops that can be implemented with either @@ -96,6 +98,8 @@ def get_nodeattr_types(self): # amount of zero padding inserted during chrc. "io_chrc_pads_in": ("ints", False, []), "io_chrc_pads_out": ("ints", False, []), + # experimental: rtlsim backend + "rtlsim_backend": ("s", False, "pyverilator", {"pyverilator", "pyxsi"}), } def get_verilog_top_module_name(self): @@ -132,8 +136,23 @@ def get_rtlsim(self): rtlsim_so = self.get_nodeattr("rtlsim_so") assert os.path.isfile(rtlsim_so), "Cannot find rtlsim library." - # create PyVerilator wrapper - sim = PyVerilator(rtlsim_so) + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + + if rtlsim_backend == "pyverilator": + # create PyVerilator wrapper + sim = PyVerilator(rtlsim_so) + elif rtlsim_backend == "pyxsi": + # load up pyXSI sim using pyxsi_rpcclient + sim_base, sim_rel = rtlsim_so.split("xsim.dir") + sim_rel = "xsim.dir" + sim_rel + # pass in correct tracefile from attribute + tracefile = self.get_nodeattr("rtlsim_trace") + if tracefile == "default": + tracefile = self.onnx_node.name + ".wdb" + sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, tracefile) + else: + assert False, "Unknown rtlsim_backend" + return sim def node_res_estimation(self, fpgapart): @@ -194,15 +213,27 @@ def get_op_and_param_counts(self): def reset_rtlsim(self, sim): """Sets reset input in pyverilator to zero, toggles the clock and set it back to one""" - sim.io.ap_rst_n = 0 - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - sim.io.ap_rst_n = 1 + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + if rtlsim_backend == "pyverilator": + sim.io.ap_rst_n = 0 + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + sim.io.ap_rst_n = 1 + elif rtlsim_backend == "pyxsi": + pyxsi_rpcclient.reset_rtlsim(sim) + else: + assert False, f"Unknown rtlsim_backend {rtlsim_backend}" def toggle_clk(self, sim): """Toggles the clock input in pyverilator once.""" - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + if rtlsim_backend == "pyverilator": + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + elif rtlsim_backend == "pyxsi": + pyxsi_rpcclient.toggle_clk(sim) + else: + assert False, f"Unknown rtlsim_backend {rtlsim_backend}" def rtlsim(self, sim, inp, inp2=None): """Runs the pyverilator simulation by passing the input values to the simulation, @@ -210,98 +241,112 @@ def rtlsim(self, sim, inp, inp2=None): observation loop that can abort the simulation if no output value is produced after 100 cycles.""" - trace_file = self.get_nodeattr("rtlsim_trace") - if trace_file != "": - if trace_file == "default": - trace_file = self.onnx_node.name + ".vcd" - sim.start_vcd_trace(trace_file) - inputs = inp - outputs = [] - sname = self.hls_sname() - o_ready = "out_" + sname + "_TREADY" - o_valid = "out_" + sname + "_TVALID" - o_data = "out_" + sname + "_TDATA" - in0_ready = "in0_" + sname + "_TREADY" - in0_valid = "in0_" + sname + "_TVALID" - in0_data = "in0_" + sname + "_TDATA" - in1_ready = "in1_" + sname + "_TREADY" - in1_valid = "in1_" + sname + "_TVALID" - in1_data = "in1_" + sname + "_TDATA" - - sim.io[o_ready] = 1 - - # observe if output is completely calculated - # observation_count will contain the number of cycles the calculation ran - num_out_values = self.get_number_output_values() - output_observed = False - observation_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - old_outputs = outputs - liveness_threshold = pyverilate_get_liveness_threshold_cycles() - - while not (output_observed): - sim.io[in0_valid] = 1 if len(inputs) > 0 else 0 - sim.io[in0_data] = inputs[0] if len(inputs) > 0 else 0 - if sim.io[in0_ready] == 1 and sim.io[in0_valid] == 1: - inputs = inputs[1:] - - if inp2 is not None: - sim.io[in1_valid] = 1 if len(inp2) > 0 else 0 - sim.io[in1_data] = inp2[0] if len(inp2) > 0 else 0 - if sim.io[in1_ready] == 1 and sim.io[in1_valid] == 1: - inp2 = inp2[1:] - - if sim.io[o_valid] == 1 and sim.io[o_ready] == 1: - outputs = outputs + [sim.io[o_data]] - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + if rtlsim_backend == "pyverilator": + trace_file = self.get_nodeattr("rtlsim_trace") + if trace_file != "": + if trace_file == "default": + trace_file = self.onnx_node.name + ".vcd" + sim.start_vcd_trace(trace_file) + inputs = inp + outputs = [] + sname = self.hls_sname() + o_ready = "out_" + sname + "_TREADY" + o_valid = "out_" + sname + "_TVALID" + o_data = "out_" + sname + "_TDATA" + in0_ready = "in0_" + sname + "_TREADY" + in0_valid = "in0_" + sname + "_TVALID" + in0_data = "in0_" + sname + "_TDATA" + in1_ready = "in1_" + sname + "_TREADY" + in1_valid = "in1_" + sname + "_TVALID" + in1_data = "in1_" + sname + "_TDATA" + + sim.io[o_ready] = 1 + + # observe if output is completely calculated + # observation_count will contain the number of cycles the calculation ran + num_out_values = self.get_number_output_values() + output_observed = False + observation_count = 0 + + # avoid infinite looping of simulation by aborting when there is no change in + # output values after 100 cycles + no_change_count = 0 + old_outputs = outputs + liveness_threshold = pyverilate_get_liveness_threshold_cycles() + + while not (output_observed): + sim.io[in0_valid] = 1 if len(inputs) > 0 else 0 + sim.io[in0_data] = inputs[0] if len(inputs) > 0 else 0 + if sim.io[in0_ready] == 1 and sim.io[in0_valid] == 1: + inputs = inputs[1:] + + if inp2 is not None: + sim.io[in1_valid] = 1 if len(inp2) > 0 else 0 + sim.io[in1_data] = inp2[0] if len(inp2) > 0 else 0 + if sim.io[in1_ready] == 1 and sim.io[in1_valid] == 1: + inp2 = inp2[1:] + + if sim.io[o_valid] == 1 and sim.io[o_ready] == 1: + outputs = outputs + [sim.io[o_data]] + sim.io.ap_clk = 1 + sim.io.ap_clk = 0 + + observation_count = observation_count + 1 + no_change_count = no_change_count + 1 + + if len(outputs) == num_out_values: + self.set_nodeattr("cycles_rtlsim", observation_count) + output_observed = True + + if no_change_count == liveness_threshold: + if old_outputs == outputs: + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + raise Exception( + "Error in simulation! Takes too long to produce output. " + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." + ) + else: + no_change_count = 0 + old_outputs = outputs + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() + elif rtlsim_backend == "pyxsi": + assert False, "pyxsi only supports rtlsim_multi_io for now" + else: + assert False, f"Unknown rtlsim_backend {rtlsim_backend}" - observation_count = observation_count + 1 - no_change_count = no_change_count + 1 - - if len(outputs) == num_out_values: - self.set_nodeattr("cycles_rtlsim", observation_count) - output_observed = True - - if no_change_count == liveness_threshold: - if old_outputs == outputs: - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the LIVENESS_THRESHOLD env.var. to a " - "larger value." - ) - else: - no_change_count = 0 - old_outputs = outputs - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() return outputs def rtlsim_multi_io(self, sim, io_dict): "Run rtlsim for this node, supports multiple i/o streams." - - # signal name + # signal name suffix sname = "_" + self.hls_sname() + "_" - - trace_file = self.get_nodeattr("rtlsim_trace") - if trace_file == "default": - trace_file = self.onnx_node.name + ".vcd" + rtlsim_backend = self.get_nodeattr("rtlsim_backend") num_out_values = self.get_number_output_values() - total_cycle_count = rtlsim_multi_io( - sim, - io_dict, - num_out_values, - trace_file=trace_file, - sname=sname, - liveness_threshold=pyverilate_get_liveness_threshold_cycles(), - ) + if rtlsim_backend == "pyverilator": + trace_file = self.get_nodeattr("rtlsim_trace") + if trace_file == "default": + trace_file = self.onnx_node.name + ".vcd" + total_cycle_count = rtlsim_multi_io( + sim, + io_dict, + num_out_values, + trace_file=trace_file, + sname=sname, + liveness_threshold=pyverilate_get_liveness_threshold_cycles(), + ) + elif rtlsim_backend == "pyxsi": + total_cycle_count = pyxsi_rpcclient.rtlsim_multi_io( + sim, io_dict, num_out_values, sname=sname + ) + else: + assert False, f"Unknown rtlsim_backend {rtlsim_backend}" + self.set_nodeattr("cycles_rtlsim", total_cycle_count) def generate_params(self, model, path): From c04a4a8269fc23fe6155ff7c376ff791ef9f9c8d Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 13:50:10 +0200 Subject: [PATCH 024/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a6e63cb87e..8c6511ff31 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="1451a2fd57f406cc6fb5b9128eae48eb3dad00ad" +PYXSI_COMMIT="d79a00c54a5d1f775249d913d75a18c2f67a6dd6" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From b266a8a10ecc18dd7c0b9ac08fabb8bd8771f645 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 13:50:53 +0200 Subject: [PATCH 025/102] [pyxsi] expose close_rtlsim() to exit cleanly --- src/finn/util/pyxsi_rpcclient.py | 5 +++-- src/finn/util/pyxsi_rpcserver.py | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index 9be41b8fa3..a8c7ae6a4f 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -69,8 +69,9 @@ def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_ return handle -def close_sim(handle): - (_, _, _, proc) = handle +def close_rtlsim(handle): + (sim_id, rpc_proxy, _, proc) = handle + rpc_proxy.close_rtlsim(sim_id) proc.terminate() diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py index 209986bcfd..8ff6f0404f 100644 --- a/src/finn/util/pyxsi_rpcserver.py +++ b/src/finn/util/pyxsi_rpcserver.py @@ -59,7 +59,7 @@ def compile_sim_obj(top_module_name, source_list, sim_out_dir): return ret -def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): +def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog): ret_sim_obj = pyxsi_utils.load_sim_obj( sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog ) @@ -106,6 +106,11 @@ def toggle_pos_edge(sim_id, clk_name): pyxsi_utils.toggle_pos_edge(sim, clk_name) +def close_rtlsim(sim_id): + sim = sim_id_to_obj[sim_id] + pyxsi_utils.close_rtlsim(sim) + + # ask to create server on port 0 to find an available port with SimpleXMLRPCServer(("localhost", 0), requestHandler=RequestHandler, allow_none=True) as server: port = server.server_address[1] @@ -119,5 +124,6 @@ def toggle_pos_edge(sim_id, clk_name): server.register_function(toggle_clk) server.register_function(toggle_neg_edge) server.register_function(toggle_pos_edge) + server.register_function(close_rtlsim) print(f"pyxsi RPC server is now running on {port}") server.serve_forever() From ab3c36f804f02eb7a8d235f1bd7b43896fa70f15 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 11 Oct 2024 10:47:40 +0100 Subject: [PATCH 026/102] [rtlsim] call close_rtlsim() to exit cleanly from pyxsi --- .../custom_op/fpgadataflow/hls/addstreams_hls.py | 2 +- src/finn/custom_op/fpgadataflow/hwcustomop.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index e55fcf8a27..988d309efe 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -130,7 +130,7 @@ def execute_node(self, context, graph): # super().reset_rtlsim(sim) # super().toggle_clk(sim) rtlsim_output = self.rtlsim(sim, rtlsim_inp0, rtlsim_inp1) - pyxsi_rpcclient.close_sim(sim) + pyxsi_rpcclient.close_rtlsim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 36eb3df536..4d90664f78 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -155,6 +155,18 @@ def get_rtlsim(self): return sim + def close_rtlsim(self, sim): + "Close and free up resources for rtlsim." + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + + if rtlsim_backend == "pyverilator": + # no action needed + pass + elif rtlsim_backend == "pyxsi": + pyxsi_rpcclient.close_rtlsim(sim) + else: + assert False, "Unknown rtlsim_backend" + def node_res_estimation(self, fpgapart): """Returns summarized resource estimation of BRAMs and LUTs of the node as a dictionary.""" From fe7a942efe4f1f5dfb90d5b8727ff27c3cacb45e Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 14:45:57 +0200 Subject: [PATCH 027/102] [pyxsi] redirect rpcserver out to file for visible logging --- src/finn/util/pyxsi_rpcclient.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index a8c7ae6a4f..cafa63060b 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -30,6 +30,7 @@ import pyxsi_utils import subprocess import xmlrpc.client +from time import sleep from finn.util.basic import get_finn_root, get_vivado_root @@ -45,19 +46,25 @@ def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_ # launch a pyxsi RPC server proc_env = os.environ.copy() proc_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" + logfile_wr_fd = open(sim_out_dir + "/pyxsi_rpcserver.log", "w") + logfile_rd_fd = open(sim_out_dir + "/pyxsi_rpcserver.log", "r") command = ["python", "-u", get_finn_root() + "/src/finn/util/pyxsi_rpcserver.py"] proc = subprocess.Popen( command, bufsize=1, env=proc_env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stdout=logfile_wr_fd, + stderr=logfile_wr_fd, universal_newlines=True, ) rpc_port = 8000 - line = proc.stdout.readline() + # TODO sleep to ensure RPC server has started before trying to read its port number from stdout + # bit hacky - is there a better way of communicating the open port number back to the client? + sleep(0.1) + line = logfile_rd_fd.readline() if "pyxsi RPC server is now running on" in line: rpc_port = int(line.split(" on ")[1]) + logfile_rd_fd.close() else: assert False, "Unexpected output from pyxsi RPC server" rpc_proxy = xmlrpc.client.ServerProxy(f"http://localhost:{rpc_port}", allow_none=True) From 655b90475e6115d966cd85694a9be9a9aaa6a2b4 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 20:57:01 +0100 Subject: [PATCH 028/102] [HLS] use subcore path only if it exists --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index c10d93792f..76e79693c3 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -66,8 +66,15 @@ def get_all_verilog_paths(self): ), """Node attribute "code_gen_dir_ipgen" is not set. Please run HLSSynthIP first.""" verilog_path = "{}/project_{}/sol1/impl/verilog/".format(code_gen_dir, self.onnx_node.name) - # default impl only returns the HLS verilog codegen dir - return [verilog_path] + subcore_verilog_path = "{}/project_{}/sol1/impl/ip/hdl/ip/".format( + code_gen_dir, self.onnx_node.name + ) + # default impl only returns the HLS verilog codegen dir and subcore (impl/ip/hdl/ip) dir + # if it exists + ret = [verilog_path] + if os.path.isdir(subcore_verilog_path): + ret += [subcore_verilog_path] + return ret def get_all_verilog_filenames(self, abspath=False): "Return list of all Verilog files used for this node." From acd88e5b23688929ff0f274eaa9176e5b599bdb1 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 10 Oct 2024 20:57:36 +0100 Subject: [PATCH 029/102] [pyxsi] slightly more reliable start procedure for RPC server --- src/finn/util/pyxsi_rpcclient.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index cafa63060b..650e7a2632 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -60,13 +60,17 @@ def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_ rpc_port = 8000 # TODO sleep to ensure RPC server has started before trying to read its port number from stdout # bit hacky - is there a better way of communicating the open port number back to the client? - sleep(0.1) line = logfile_rd_fd.readline() + retries = 10 + while line == "" and retries > 0: + sleep(0.1) + line = logfile_rd_fd.readline() + retries -= 1 if "pyxsi RPC server is now running on" in line: rpc_port = int(line.split(" on ")[1]) logfile_rd_fd.close() else: - assert False, "Unexpected output from pyxsi RPC server" + assert False, f"Unexpected output from pyxsi RPC server: {line}" rpc_proxy = xmlrpc.client.ServerProxy(f"http://localhost:{rpc_port}", allow_none=True) sim_id = rpc_proxy.load_sim_obj( sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog From c817adbc1ab192290a7a4b131501a9224d018f1b Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 11 Oct 2024 16:58:45 +0100 Subject: [PATCH 030/102] [prepare rtlsim] move rtlsim prep to rtlbackend for rtl layers --- src/finn/core/onnx_exec.py | 64 +++++++++---------- .../rtl/convolutioninputgenerator_rtl.py | 30 +-------- .../fpgadataflow/rtl/fmpadding_rtl.py | 31 +-------- .../rtl/streamingdatawidthconverter_rtl.py | 30 +-------- .../fpgadataflow/rtl/streamingfifo_rtl.py | 36 +++-------- src/finn/custom_op/fpgadataflow/rtlbackend.py | 36 ++++++++++- 6 files changed, 77 insertions(+), 150 deletions(-) diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 588e97e9e4..7c0d69e17a 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -52,44 +52,38 @@ def execute_onnx(model, input_dict, return_full_exec_context=False, start_node=N model_exec_mode = model.get_metadata_prop("exec_mode") if (model_exec_mode is None) or (model_exec_mode == ""): return execute_onnx_base(model, input_dict, return_full_exec_context, start_node, end_node) + elif model_exec_mode == "rtlsim": + # check sanity of model and then use stitched IP for rtlsim + if not model.check_all_tensor_shapes_specified(): + raise Exception("Found unspecified tensor shapes, try infer_shapes") + ret = model.analysis(ta.nodes_topologically_sorted) + assert ( + ret["nodes_topologically_sorted"] is True + ), """Nodes must be + topologically sorted.""" - if not model.check_all_tensor_shapes_specified(): - raise Exception("Found unspecified tensor shapes, try infer_shapes") - ret = model.analysis(ta.nodes_topologically_sorted) - assert ( - ret["nodes_topologically_sorted"] is True - ), """Nodes must be - topologically sorted.""" - - graph = model.graph - # first, we need to make sure that every variable required by the graph has - # some buffer associated with it. this includes graph inputs (which includes - # the input data as well as the trained parameters) and the graph ValueInfo - # (intermediate tensors between layers) - # this is provided by the execution_context, which is a dict of np.ndarray - execution_context = model.make_empty_exec_context() - # fill in any inputs provided to this function - for inp_name in input_dict.keys(): - if inp_name in execution_context: - if execution_context[inp_name].shape == input_dict[inp_name].shape: - execution_context[inp_name] = input_dict[inp_name] - else: - raise Exception( - "Shape mismatch for provided input %s: found %s expected %s " - % ( - inp_name, - str(execution_context[inp_name].shape), - str(input_dict[inp_name].shape), + graph = model.graph + # first, we need to make sure that every variable required by the graph has + # some buffer associated with it. this includes graph inputs (which includes + # the input data as well as the trained parameters) and the graph ValueInfo + # (intermediate tensors between layers) + # this is provided by the execution_context, which is a dict of np.ndarray + execution_context = model.make_empty_exec_context() + # fill in any inputs provided to this function + for inp_name in input_dict.keys(): + if inp_name in execution_context: + if execution_context[inp_name].shape == input_dict[inp_name].shape: + execution_context[inp_name] = input_dict[inp_name] + else: + raise Exception( + "Shape mismatch for provided input %s: found %s expected %s " + % ( + inp_name, + str(execution_context[inp_name].shape), + str(input_dict[inp_name].shape), + ) ) - ) - # check if model has an execution mode set - # if None, execute model node by node using execute_node() - # if set to "rtlsim" execute model using pyverilator - model_exec_mode = model.get_metadata_prop("exec_mode") - if (model_exec_mode is None) or (model_exec_mode == ""): - return execute_onnx_base() - elif model_exec_mode == "rtlsim": # use stitched IP for rtlsim rtlsim_exec(model, execution_context) else: diff --git a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py index 321522e7ba..6165919169 100755 --- a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py @@ -40,14 +40,8 @@ ConvolutionInputGenerator, ) from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - # RTL Convolution Input Generator / Sliding Window Generator (SWG) # Matches and extends the functionality of all ConvolutionInputGenerator_* functions # in finn-hlslib by generating HDL code for two different implementation styles: @@ -932,17 +926,7 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - # Modified to use generated (System-)Verilog instead of HLS output products - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] + def get_rtl_file_list(self): verilog_files = [ "swg_pkg.sv", self.get_nodeattr("gen_top_module") + "_wrapper.v", @@ -952,17 +936,7 @@ def prepare_rtlsim(self): if self.get_nodeattr("dynamic_mode"): verilog_files.append(self.get_nodeattr("gen_top_module") + "_axilite.v") - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim + return verilog_files def code_generation_ipi(self): """Constructs and returns the TCL for node instantiation in Vivado IPI.""" diff --git a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py index cc49446ea3..4b37577ca8 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py @@ -34,14 +34,8 @@ from finn.custom_op.fpgadataflow.fmpadding import FMPadding from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - class FMPadding_rtl(FMPadding, RTLBackend): """CustomOp wrapper for the finn-rtllib fmpadding_axi component @@ -206,35 +200,14 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - # Modified to use generated (System-)Verilog instead of HLS output products - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] + def get_rtl_file_list(self): verilog_files = [ "fmpadding_axi.sv", "fmpadding.sv", "axi2we.sv", self.get_nodeattr("gen_top_module") + ".v", ] - - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim + return verilog_files def code_generation_ipi(self): """Constructs and returns the TCL for node instantiation in Vivado IPI.""" diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py index e79782eb6d..fdb763c81f 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py @@ -34,14 +34,8 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter import ( StreamingDataWidthConverter, ) -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - class StreamingDataWidthConverter_rtl(StreamingDataWidthConverter, RTLBackend): """Class that corresponds to finn-rtllib datawidth converter @@ -167,34 +161,14 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - # Modified to use generated (System-)Verilog instead of HLS output products - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] + def get_rtl_file_list(self): verilog_files = [ "dwc_axi.sv", "dwc.sv", self.get_nodeattr("gen_top_module") + ".v", ] - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim + return verilog_files def code_generation_ipi(self): """Constructs and returns the TCL for node instantiation in Vivado IPI.""" diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index f8f27cb647..6c38dd405d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -33,14 +33,8 @@ from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend from finn.custom_op.fpgadataflow.streamingfifo import StreamingFIFO -from finn.util.basic import get_rtlsim_trace_depth, make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -try: - from pyverilator import PyVerilator -except ModuleNotFoundError: - PyVerilator = None - class StreamingFIFO_rtl(StreamingFIFO, RTLBackend): def __init__(self, onnx_node, **kwargs): @@ -254,30 +248,16 @@ def code_generation_ipi(self): "FIFO implementation style %s not supported, please use rtl or vivado" % impl_style ) - def prepare_rtlsim(self): - assert self.get_nodeattr("impl_style") != "vivado", ( - "StreamingFIFO impl_style " - "cannot be vivado for rtlsim. Only impl_style=rtl supported." - ) - # Modified to use generated (System-)Verilog instead of HLS output products - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] + def get_rtl_file_list(self): verilog_files = [ "Q_srl.v", self.get_nodeattr("gen_top_module") + ".v", ] - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), + return verilog_files + + def prepare_rtlsim(self): + assert self.get_nodeattr("impl_style") != "vivado", ( + "StreamingFIFO impl_style " + "cannot be vivado for rtlsim. Only impl_style=rtl supported." ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim + return super().prepare_rtlsim() diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index 2e4d647b22..a0d2ade081 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -28,6 +28,13 @@ from abc import ABC, abstractmethod +from finn.util.basic import get_rtlsim_trace_depth, make_build_dir + +try: + from pyverilator import PyVerilator +except ModuleNotFoundError: + PyVerilator = None + class RTLBackend(ABC): """RTLBackend class all custom ops that correspond to a module in finn-rtllib @@ -45,9 +52,34 @@ def get_nodeattr_types(self): def generate_hdl(self, model, fpgapart, clk): pass - @abstractmethod def prepare_rtlsim(self): - pass + """Creates a Verilator emulation library for the RTL code generated + for this node, sets the rtlsim_so attribute to its path and returns + a PyVerilator wrapper around it.""" + + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_paths = [code_gen_dir] + verilog_files = self.get_rtl_file_list() + + # build the Verilator emu library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_verilog_top_module_name(), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + return sim + + # TODO: enable all rtl nodes to use parent function + # @abstractmethod + # def get_rtl_file_list(self): + # pass @abstractmethod def code_generation_ipi(self): From 8d2daaf7b18f94a42dae04df7e495ae08e24683e Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 14 Oct 2024 13:50:26 +0100 Subject: [PATCH 031/102] [pyxsi] Make pyxsi import optional to allow GHA to work without installation --- src/finn/util/pyxsi_rpcclient.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index 650e7a2632..29cb077222 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -27,13 +27,17 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import pyxsi_utils import subprocess import xmlrpc.client from time import sleep from finn.util.basic import get_finn_root, get_vivado_root +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + def compile_sim_obj(top_module_name, source_list, sim_out_dir): # compile_sim_obj does not require special envvar settings and is safe to call From 1cc5f11cbe792cd14045a2ad421f39596eac04a2 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 15 Oct 2024 18:44:48 +0100 Subject: [PATCH 032/102] [PrepareRTLSim] Clean-up functions in rtl thresholding and move functionality to rtlbackend --- .../fpgadataflow/rtl/thresholding_rtl.py | 99 ++++--------------- src/finn/custom_op/fpgadataflow/rtlbackend.py | 2 +- 2 files changed, 20 insertions(+), 81 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py index d1e9387b1b..d47632dfad 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py @@ -38,8 +38,6 @@ from finn.custom_op.fpgadataflow.thresholding import Thresholding from finn.util.basic import ( get_memutil_alternatives, - get_rtlsim_trace_depth, - make_build_dir, mem_primitives_versal, pyverilate_get_liveness_threshold_cycles, ) @@ -295,41 +293,9 @@ def get_rtl_file_list(self): "axilite_if.v", "thresholding.sv", "thresholding_axi.sv", - "thresholding_template_wrapper.v", + self.get_nodeattr("gen_top_module") + ".v", ] - def get_rtl_file_paths(self): - """Get full path of all RTL files""" - rtl_root_dir = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl/" - rtl_file_list = self.get_rtl_file_list() - rtl_file_paths = [rtl_root_dir + file for file in rtl_file_list] - return rtl_file_paths - - def get_rtl_template_data(self, path): - """Return RTL file contents as a template""" - with open(path, "r") as f: - template = f.read() - return template - - def fill_in_rtl_template_data(self, replace_dict, template_data): - """Use attribute values to finn in RTL template placeholders""" - template_data_cp = template_data - for key in replace_dict: - replacement_line = "\n".join(replace_dict[key]) - template_data_cp = template_data_cp.replace(key, replacement_line) - return template_data_cp - - def dump_rtl_data(self, dest_dir, filename, data): - """Dump filled-in-template RTL files for future synthesis step""" - # when generating template files, handle a special case: - # if the filename contains the word "template", replace that - # with the node name to distinguish between instances - if "template" in filename: - filename = self.get_nodeattr("gen_top_module") + ".v" - with open(os.path.join(dest_dir, filename), "w") as f: - f.write(data) - return - def generate_hdl(self, model, fpgapart, clk): """Prepare HDL files from templates for synthesis""" # Generate a dictionary of values to put in RTL template @@ -342,14 +308,23 @@ def generate_hdl(self, model, fpgapart, clk): # by PyVerilator and IPI generation self.set_nodeattr("gen_top_module", code_gen_dict["$TOP_MODULE$"][0]) - for rtl_file_path in self.get_rtl_file_paths(): - # read in original RTL template file - template_data = self.get_rtl_template_data(rtl_file_path) - # apply code generation to templates - data = self.fill_in_rtl_template_data(code_gen_dict, template_data) - # dump filled-in template to destination directory for compilation - file_only_path = rtl_file_path.split("/")[-1] - self.dump_rtl_data(code_gen_dir, file_only_path, data) + rtlsrc = os.environ["FINN_ROOT"] + "/finn-rtllib/thresholding/hdl" + template_path = rtlsrc + "/thresholding_template_wrapper.v" + with open(template_path, "r") as f: + template_wrapper = f.read() + for key in code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(code_gen_dict[key]) + template_wrapper = template_wrapper.replace(key, code_gen_line) + with open( + os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + ".v"), + "w", + ) as f: + f.write(template_wrapper) + + sv_files = ["axilite_if.v", "thresholding.sv", "thresholding_axi.sv"] + for sv_file in sv_files: + shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) # set ipgen_path and ip_path so that HLS-Synth transformation # and stich_ip transformation do not complain @@ -358,39 +333,6 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ip_path", code_gen_dir) return - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] - verilog_files = [ - x.replace("thresholding_template_wrapper", self.get_nodeattr("gen_top_module")) - for x in self.get_rtl_file_list() - ] - dat_files = self.get_all_meminit_filenames(abspath=True) - single_src_dir = make_build_dir("pyverilator_" + self.onnx_node.name + "_") - for dat_file in dat_files: - shutil.copy(dat_file, single_src_dir) - - # build the Verilator emulation library - sim = PyVerilator.build( - verilog_files, - build_dir=single_src_dir, - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_nodeattr("gen_top_module"), - auto_eval=False, - ) - - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - return sim - def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") @@ -489,10 +431,7 @@ def execute_node(self, context, graph): def code_generation_ipi(self): """Constructs and returns the TCL commands for node instantiation as an RTL block.""" - rtl_file_list = [ - x.replace("thresholding_template_wrapper", self.get_nodeattr("gen_top_module")) - for x in self.get_rtl_file_list() - ] + rtl_file_list = self.get_rtl_file_list() code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") source_target = "./ip/verilog/rtl_ops/%s" % self.onnx_node.name cmd = ["file mkdir %s" % source_target] diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index a0d2ade081..62b0de46ab 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -70,7 +70,7 @@ def prepare_rtlsim(self): build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), verilog_path=verilog_paths, trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), + top_module_name=self.get_nodeattr("gen_top_module"), ) # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", sim.lib._name) From 20e8b596a2b0bc048d6773ca72f0053962be8bd8 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 11 Oct 2024 23:11:25 +0200 Subject: [PATCH 033/102] [rtlsim] enable stitched-IP rtlsim with pyxsi --- src/finn/core/rtlsim_exec.py | 151 ++++++++++++++++++++++++++++------- 1 file changed, 123 insertions(+), 28 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index d45c972928..b8623086ef 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -30,7 +30,7 @@ from pyverilator.util.axi_utils import reset_rtlsim, rtlsim_multi_io from qonnx.custom_op.registry import getCustomOp -from finn.util.basic import pyverilate_get_liveness_threshold_cycles +from finn.util.basic import make_build_dir, pyverilate_get_liveness_threshold_cycles from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy from finn.util.pyverilator import pyverilate_stitched_ip @@ -39,35 +39,10 @@ except ModuleNotFoundError: PyVerilator = None +import finn.util.pyxsi_rpcclient as pyxsi_rpcclient -def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): - """Use PyVerilator to execute given model with stitched IP. The execution - context contains the input values. Hook functions can be optionally - specified to observe/alter the state of the circuit, receiving the - PyVerilator sim object as their first argument: - - pre_hook : hook function to be called before sim start (after reset) - - post_hook : hook function to be called after sim end - """ - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") - # ensure stitched ip project already exists - assert os.path.isfile( - model.get_metadata_prop("wrapper_filename") - ), """The - file name from metadata property "wrapper_filename" doesn't exist.""" - assert os.path.isdir( - model.get_metadata_prop("vivado_stitch_proj") - ), """The - directory from metadata property "vivado_stitch_proj" doesn't exist""" - trace_file = model.get_metadata_prop("rtlsim_trace") - if trace_file is None: - trace_file = "" - extra_verilator_args = model.get_metadata_prop("extra_verilator_args") - if extra_verilator_args is None: - extra_verilator_args = [] - else: - extra_verilator_args = eval(extra_verilator_args) +def prep_rtlsim_io_dict(model, execution_context): # extract i/o info to prepare io_dict io_dict = {"inputs": {}, "outputs": {}} if_dict = eval(model.get_metadata_prop("vivado_stitch_ifnames")) @@ -123,6 +98,108 @@ def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): o_stream_w = last_node.get_outstream_width() o_tensor_info.append((o_stream_w, o_dt, o_folded_shape, o_shape)) num_out_values += batchsize * last_node.get_number_output_values() + return io_dict, if_dict, num_out_values, o_tensor_info + + +def file_to_basename(x): + return os.path.basename(os.path.realpath(x)) + + +def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): + """Use PyXSI to execute given model with stitched IP. The execution + context contains the input values. Hook functions can be optionally + specified to observe/alter the state of the circuit, receiving the + PyXSI RPC sim handle as their first argument: + - pre_hook : hook function to be called before sim start (after reset) + - post_hook : hook function to be called after sim end + """ + # ensure stitched ip project already exists + assert os.path.isfile( + model.get_metadata_prop("wrapper_filename") + ), """The + file name from metadata property "wrapper_filename" doesn't exist.""" + assert os.path.isdir( + model.get_metadata_prop("vivado_stitch_proj") + ), """The + directory from metadata property "vivado_stitch_proj" doesn't exist""" + trace_file = model.get_metadata_prop("rtlsim_trace") + io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict(model, execution_context) + + # prepare rtlsim model + rtlsim_so = model.get_metadata_prop("rtlsim_so") + if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): + vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: + all_verilog_srcs = f.read().split() + top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename")) + top_module_name = top_module_file_name.strip(".v") + single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") + + rtlsim_so = pyxsi_rpcclient.compile_sim_obj( + top_module_name, all_verilog_srcs, single_src_dir + ) + # save generated lib filename in attribute + model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) + sim_base, sim_rel = rtlsim_so + # pass in correct tracefile from attribute + if trace_file == "default": + trace_file = top_module_file_name + ".wdb" + sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) + else: + sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) + + # reset and call rtlsim, including any pre/post hooks + pyxsi_rpcclient.reset_rtlsim(sim) + if pre_hook is not None: + pre_hook(sim) + n_cycles = pyxsi_rpcclient.rtlsim_multi_io( + sim, + io_dict, + num_out_values, + sname="_", + liveness_threshold=pyverilate_get_liveness_threshold_cycles(), + ) + if post_hook is not None: + post_hook(sim) + # important to call close_rtlsim for pyxsi to flush traces and stop + # the RPC server process + pyxsi_rpcclient.close_rtlsim(sim) + + # unpack outputs and put back into execution context + for o, o_vi in enumerate(model.graph.output): + o_name = o_vi.name + if_name = if_dict["m_axis"][o][0] + o_stream_w, o_dt, o_folded_shape, o_shape = o_tensor_info[o] + packed_output = io_dict["outputs"][if_name] + o_folded_tensor = rtlsim_output_to_npy( + packed_output, None, o_dt, o_folded_shape, o_stream_w, o_dt.bitwidth() + ) + execution_context[o_name] = o_folded_tensor.reshape(o_shape) + + model.set_metadata_prop("cycles_rtlsim", str(n_cycles)) + + +def rtlsim_exec_pyverilator(model, execution_context, pre_hook=None, post_hook=None): + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + # ensure stitched ip project already exists + assert os.path.isfile( + model.get_metadata_prop("wrapper_filename") + ), """The + file name from metadata property "wrapper_filename" doesn't exist.""" + assert os.path.isdir( + model.get_metadata_prop("vivado_stitch_proj") + ), """The + directory from metadata property "vivado_stitch_proj" doesn't exist""" + trace_file = model.get_metadata_prop("rtlsim_trace") + if trace_file is None: + trace_file = "" + extra_verilator_args = model.get_metadata_prop("extra_verilator_args") + if extra_verilator_args is None: + extra_verilator_args = [] + else: + extra_verilator_args = eval(extra_verilator_args) + io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict(model, execution_context) # prepare pyverilator model rtlsim_so = model.get_metadata_prop("rtlsim_so") @@ -159,3 +236,21 @@ def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): execution_context[o_name] = o_folded_tensor.reshape(o_shape) model.set_metadata_prop("cycles_rtlsim", str(n_cycles)) + + +def rtlsim_exec(model, execution_context, pre_hook=None, post_hook=None): + """Use PyVerilator or PyXSI to execute given model with stitched IP, depending + on the rtlsim_backend metadata_prop on the model. The execution + context contains the input values. Hook functions can be optionally + specified to observe/alter the state of the circuit, receiving the + PyVerilator sim object as their first argument: + - pre_hook : hook function to be called before sim start (after reset) + - post_hook : hook function to be called after sim end + """ + backend = model.get_metadata_prop("rtlsim_backend") + if backend == "pyverilator": + rtlsim_exec_pyverilator(model, execution_context, pre_hook, post_hook) + elif backend == "pyxsi": + rtlsim_exec_pyxsi(model, execution_context, pre_hook, post_hook) + else: + assert False, f"Unrecognized rtlsim_backend value: {backend}" From cb6e08d8530ca9b3ca8d89aea5205a1130dabc89 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 14 Oct 2024 21:03:40 +0200 Subject: [PATCH 034/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 8c6511ff31..bdd0612267 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="d79a00c54a5d1f775249d913d75a18c2f67a6dd6" +PYXSI_COMMIT="0d321c4114943d5fc435e3a52ce1d8ba5b77cf72" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 7f09b73b8c84b210d5e3788cb725211161b05ad9 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Wed, 16 Oct 2024 10:25:31 +0100 Subject: [PATCH 035/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index bdd0612267..73d5e6bc39 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="0d321c4114943d5fc435e3a52ce1d8ba5b77cf72" +PYXSI_COMMIT="dc074bc1b3ecc2ab884531565d1aca6aa33ea5b9" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 7018cfe5ef0d0a49c08f696990238162ac047446 Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Mon, 14 Oct 2024 15:44:35 +0100 Subject: [PATCH 036/102] Closing the handle if the simulation times out --- src/finn/util/pyxsi_rpcclient.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py index 29cb077222..fa4909d138 100644 --- a/src/finn/util/pyxsi_rpcclient.py +++ b/src/finn/util/pyxsi_rpcclient.py @@ -213,6 +213,7 @@ def rtlsim_multi_io( # end sim on timeout if no_change_count == liveness_threshold: + close_rtlsim(handle) raise Exception( "Error in simulation! Takes too long to produce output. " "Consider setting the liveness_threshold parameter to a " From 812bde4573b24f8d0d4d4e4a2546c22ff35792b0 Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 17 Oct 2024 13:51:00 +0100 Subject: [PATCH 037/102] [stitchedIP-rtlsim] Default rtlsim backend metadata prop to pyverilator --- .../end2end_example/bnn-pynq/tfc_end2end_verification.ipynb | 1 + src/finn/builder/build_dataflow_steps.py | 2 ++ src/finn/transformation/fpgadataflow/set_fifo_depths.py | 2 ++ tests/end2end/test_end2end_bnn_pynq.py | 1 + tests/end2end/test_end2end_mobilenet_v1.py | 1 + tests/fpgadataflow/test_fpgadataflow_checksum.py | 1 + tests/fpgadataflow/test_fpgadataflow_concat.py | 1 + .../test_fpgadataflow_convinputgenerator_rtl_dynamic.py | 2 ++ tests/fpgadataflow/test_fpgadataflow_dwc.py | 1 + tests/fpgadataflow/test_fpgadataflow_ipstitch.py | 1 + tests/fpgadataflow/test_fpgadataflow_mvau.py | 2 +- tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py | 2 ++ tests/fpgadataflow/test_fpgadataflow_vvau.py | 1 + tests/fpgadataflow/test_runtime_weights.py | 1 + 14 files changed, 18 insertions(+), 1 deletion(-) diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb index aacd12ef05..0f7903a009 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb @@ -404,6 +404,7 @@ "child_model = child_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))\n", "child_model = child_model.transform(PrepareRTLSim())\n", "child_model.set_metadata_prop(\"exec_mode\",\"rtlsim\")\n", + "child_model.set_metadata_prop(\"rtlsim_backend\",\"pyverilator\")\n", "child_model.save(build_dir + \"/tfc_w1_a1_dataflow_child.onnx\");" ] }, diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index ab2280554c..176f0c5d8d 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -250,6 +250,8 @@ def prepare_for_stitched_ip_rtlsim(verify_model, cfg): # set top-level prop for stitched-ip rtlsim and launch verify_model.set_metadata_prop("exec_mode", "rtlsim") # TODO make configurable + verify_model.set_metadata_prop("rtlsim_backend", "pyverilator") + # TODO make configurable # verify_model.set_metadata_prop("rtlsim_trace", "trace.vcd") return verify_model diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 82ee536d50..78a6142a47 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -324,6 +324,8 @@ def apply(self, model): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") + # TODO: needs to check if pyxsi necessary + model.set_metadata_prop("rtlsim_backend", "pyverilator") if self.force_python_sim: # do rtlsim in Python for FIFO sizing diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index 0d3418624a..c6c56573ec 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -746,6 +746,7 @@ def test_ipstitch_rtlsim(self, topology, wbits, abits, board): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1)) if rtlsim_trace: model.set_metadata_prop("rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)) diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py index 4c52277970..bdaa3490b3 100644 --- a/tests/end2end/test_end2end_mobilenet_v1.py +++ b/tests/end2end/test_end2end_mobilenet_v1.py @@ -502,6 +502,7 @@ def test_end2end_mobilenet_stitched_ip_rtlsim(): # set top-level prop for stitched-ip rtlsim and launch model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") ret_rtlsim_ip = execute_onnx(model, inp_dict, True) res_rtlsim_ip = ret_rtlsim_ip[out_name] np.save(build_dir + "/end2end_mobilenet_result_rtlsim_ip.npy", res_rtlsim_ip) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 817d13e13d..1cfb7f9ec9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -182,6 +182,7 @@ def test_fpgadataflow_checksum(): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") # define function to read out the checksums from axilite checksums = [] diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py index 25c738d049..e207b17a63 100644 --- a/tests/fpgadataflow/test_fpgadataflow_concat.py +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -157,6 +157,7 @@ def test_fpgadataflow_concat_stitchedip(): ) ) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") model.set_metadata_prop("rtlsim_trace", "trace.vcd") ret_sim = execute_onnx(model, inp_dict) assert (exp_out == ret_sim[oname]).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index 26ce8f5f0e..a98cc1ab54 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -290,6 +290,7 @@ def test_fpgadataflow_conv_dynamic(cfg): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5, vitis=do_synth)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") # loop through experiment configurations for exp_cfg in exp_cfgs: @@ -535,6 +536,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") # Simulate 1 FM for each dimension in the series for i, ifm_dim in enumerate(ifm_dim_series): diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index 6b79a39ed5..ccf5335395 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -165,6 +165,7 @@ def test_fpgadataflow_dwc_stitched_rtlsim(config, impl_style): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") y = oxe.execute_onnx(model, input_dict)["outp"] assert ( diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 2061601b4a..2d3e76f238 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -272,6 +272,7 @@ def test_fpgadataflow_ipstitch_rtlsim(mem_mode): ] assert sorted(dir(sim.io)) == sorted(exp_io) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") idt = model.get_tensor_datatype("inp") ishape = model.get_tensor_shape("inp") x = gen_finn_dt_tensor(idt, ishape) diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index 1ec77f4eec..2589c997e2 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -723,8 +723,8 @@ def test_fpgadataflow_rtl_mvau(mh, mw, pe, simd, idt, wdt, part, clk_ns): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(part, clk_ns)) - model.set_metadata_prop("rtlsim_so", "") model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") output_mvau_rtl_stitch = oxe.execute_onnx(model, input_dict)["global_out"] assert ( diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py index e6175ac58b..c7685a4b09 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py @@ -186,6 +186,7 @@ def test_runtime_thresholds_read(impl_style, idt_act_cfg, cfg, narrow, per_tenso model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while # we read/write new ones and reads seem to cause a disturbance too) @@ -299,6 +300,7 @@ def test_runtime_thresholds_write(impl_style, idt_act_cfg, cfg, narrow, per_tens model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while # we read/write new ones and reads seem to cause a disturbance too) diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index 236176faa6..ef3a120543 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -457,6 +457,7 @@ def test_fpgadataflow_vvau_rtl(kernel_size, in_feature_dim, in_chn, idt, wdt, pa partitioned_model = partitioned_model.transform(CreateStitchedIP(part, 5)) # set top-level prop for stitched-ip rtlsim and launch partitioned_model.set_metadata_prop("exec_mode", "rtlsim") + partitioned_model.set_metadata_prop("rtlsim_backend", "pyverilator") # transpose input since we're now simulating HW layers (NCHW --> NHWC) input_dict["global_in"] = np.transpose(input_dict["global_in"], (0, 2, 3, 1)) output_vvau_stitched = oxe.execute_onnx( diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py index 4ca61578c3..aaf30a052b 100644 --- a/tests/fpgadataflow/test_runtime_weights.py +++ b/tests/fpgadataflow/test_runtime_weights.py @@ -89,6 +89,7 @@ def test_runtime_weights_single_layer(): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyverilator") in_tensor = np.asarray(range(mw), dtype=np.float32) # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while From 25443da30260f07bebcd5d0025affb2f1a9f7a85 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 22 Oct 2024 10:37:01 +0100 Subject: [PATCH 038/102] [prepare rtlsim] Clean-up functions in rtl mvu and vvu --- .../rtl/matrixvectoractivation_rtl.py | 29 ++++--------------- .../rtl/vectorvectoractivation_rtl.py | 29 ++++--------------- src/finn/custom_op/fpgadataflow/rtlbackend.py | 14 +++++---- 3 files changed, 20 insertions(+), 52 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index d9ab501117..b950d5a836 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -32,7 +32,7 @@ from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend -from finn.util.basic import get_dsp_block, get_rtlsim_trace_depth, make_build_dir +from finn.util.basic import get_dsp_block from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy try: @@ -282,28 +282,11 @@ def prepare_codegen_default(self, fpgapart, clk): return template_path, code_gen_dict - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") + def get_rtl_file_list(self): + verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] + return verilog_files + def get_verilog_paths(self): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - # Path to (System-)Verilog files used by top-module & path to top-module verilog_paths = [code_gen_dir, os.environ["FINN_ROOT"] + "/finn-rtllib/mvu"] - verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] - - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - - return sim + return verilog_paths diff --git a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py index 32943d86cf..8273978c68 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py @@ -33,7 +33,7 @@ from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU -from finn.util.basic import get_rtlsim_trace_depth, is_versal, make_build_dir +from finn.util.basic import is_versal from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy try: @@ -274,28 +274,11 @@ def prepare_codegen_default(self, fpgapart, clk): return template_path, code_gen_dict - def prepare_rtlsim(self): - """Creates a Verilator emulation library for the RTL code generated - for this node, sets the rtlsim_so attribute to its path and returns - a PyVerilator wrapper around it.""" - - if PyVerilator is None: - raise ImportError("Installation of PyVerilator is required.") + def get_rtl_file_list(self): + verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] + return verilog_files + def get_verilog_paths(self): code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - # Path to (System-)Verilog files used by top-module & path to top-module verilog_paths = [code_gen_dir, os.environ["FINN_ROOT"] + "/finn-rtllib/mvu"] - verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] - - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_verilog_top_module_name(), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) - - return sim + return verilog_paths diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index 62b0de46ab..2d4dc7498f 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -60,8 +60,7 @@ def prepare_rtlsim(self): if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir] + verilog_paths = self.get_verilog_paths() verilog_files = self.get_rtl_file_list() # build the Verilator emu library @@ -76,10 +75,13 @@ def prepare_rtlsim(self): self.set_nodeattr("rtlsim_so", sim.lib._name) return sim - # TODO: enable all rtl nodes to use parent function - # @abstractmethod - # def get_rtl_file_list(self): - # pass + def get_verilog_paths(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + return [code_gen_dir] + + @abstractmethod + def get_rtl_file_list(self): + pass @abstractmethod def code_generation_ipi(self): From afbf1fea097303abd88a8f76b4e1ac847f8d5939 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 22 Oct 2024 12:08:05 +0100 Subject: [PATCH 039/102] [execute node] switch to rtlsim multi io for all custom ops --- .../custom_op/fpgadataflow/hls/channelwise_op_hls.py | 7 ++++++- .../fpgadataflow/hls/convolutioninputgenerator_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py | 7 ++++++- .../custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/lookup_hls.py | 7 ++++++- .../fpgadataflow/hls/matrixvectoractivation_hls.py | 9 ++++++--- src/finn/custom_op/fpgadataflow/hls/pool_hls.py | 7 ++++++- .../fpgadataflow/hls/streamingdatawidthconverter_hls.py | 7 ++++++- .../custom_op/fpgadataflow/hls/streamingeltwise_hls.py | 7 ++++++- .../custom_op/fpgadataflow/hls/streamingmaxpool_hls.py | 7 ++++++- src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py | 9 ++++++--- src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py | 7 ++++++- .../fpgadataflow/hls/vectorvectoractivation_hls.py | 9 ++++++--- src/finn/custom_op/fpgadataflow/hwcustomop.py | 3 +-- .../fpgadataflow/rtl/convolutioninputgenerator_rtl.py | 7 ++++++- src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py | 7 ++++++- .../fpgadataflow/rtl/matrixvectoractivation_rtl.py | 9 ++++++--- .../fpgadataflow/rtl/streamingdatawidthconverter_rtl.py | 7 ++++++- src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py | 7 ++++++- .../fpgadataflow/rtl/vectorvectoractivation_rtl.py | 9 ++++++--- 23 files changed, 133 insertions(+), 34 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py b/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py index 14efa113dd..adb71c0ccb 100644 --- a/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py @@ -285,7 +285,12 @@ def execute_node(self, context, graph): inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py b/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py index 4a5c02ee06..eeb7dd880d 100644 --- a/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py @@ -388,7 +388,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py b/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py index 56f472b9c0..76364befde 100644 --- a/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py @@ -139,7 +139,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py b/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py index d57699af05..a6eb9cab06 100644 --- a/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py @@ -186,7 +186,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py b/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py index b7ba301fbc..7e6bb80e3e 100644 --- a/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py @@ -141,7 +141,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py b/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py index 9b2a7b25b0..e195850663 100644 --- a/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py @@ -119,7 +119,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py b/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py index 1e2c0d034a..a79856f7ee 100644 --- a/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py @@ -121,7 +121,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py index ba44deb898..fbe12e51eb 100644 --- a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py @@ -298,7 +298,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() out_npy_path = "{}/output.npy".format(code_gen_dir) diff --git a/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py b/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py index cae1c30eb6..772057b7d8 100644 --- a/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py @@ -556,10 +556,13 @@ def execute_node(self, context, graph): "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } - self.rtlsim_multi_io(sim, io_dict) - output = io_dict["outputs"]["out"] else: - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/pool_hls.py b/src/finn/custom_op/fpgadataflow/hls/pool_hls.py index 64c6ec33f8..609c53fd68 100644 --- a/src/finn/custom_op/fpgadataflow/hls/pool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/pool_hls.py @@ -236,7 +236,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py index 4619a1756b..c58aabbdbe 100644 --- a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py @@ -178,7 +178,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py index 0d618d832a..41ee72fe8c 100644 --- a/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py @@ -130,7 +130,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp0, rtlsim_inp1) + io_dict = { + "inputs": {"in0": rtlsim_inp0, "in1": rtlsim_inp1}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py index 69db7b4606..f7546d7e1a 100755 --- a/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py @@ -191,7 +191,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py index b753bc7a03..4c0da73ec9 100644 --- a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py @@ -348,12 +348,15 @@ def execute_node(self, context, graph): "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } - self.rtlsim_multi_io(sim, io_dict) - output = io_dict["outputs"]["out"] elif self.get_nodeattr("mem_mode") == "internal_embedded": - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } else: raise Exception("Unrecognized mem_mode") + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py b/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py index 05d26eddb2..c6a062a775 100644 --- a/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py @@ -149,7 +149,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py b/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py index f9ba68e6b6..8f2419b694 100644 --- a/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py @@ -208,10 +208,13 @@ def execute_node(self, context, graph): "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } - self.rtlsim_multi_io(sim, io_dict) - output = io_dict["outputs"]["out"] else: - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 4d90664f78..dafe469104 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -69,6 +69,7 @@ def get_nodeattr_types(self): "res_estimate": ("s", False, ""), "res_synth": ("s", False, ""), "rtlsim_so": ("s", False, ""), + "rtlsim_backend": ("s", False, "pyverilator", {"pyverilator", "pyxsi"}), # partitioning info # ID of SLR to which the Op is attached in Vitis builds # Set to -1 as 'don't care' @@ -98,8 +99,6 @@ def get_nodeattr_types(self): # amount of zero padding inserted during chrc. "io_chrc_pads_in": ("ints", False, []), "io_chrc_pads_out": ("ints", False, []), - # experimental: rtlsim backend - "rtlsim_backend": ("s", False, "pyverilator", {"pyverilator", "pyxsi"}), } def get_verilog_top_module_name(self): diff --git a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py index 6165919169..e6cfa204c5 100755 --- a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py @@ -331,7 +331,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py index 4b37577ca8..2fd589dd9b 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py @@ -91,7 +91,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index b950d5a836..4dc883f64f 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -108,10 +108,13 @@ def execute_node(self, context, graph): "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } - self.rtlsim_multi_io(sim, io_dict) - output = io_dict["outputs"]["out"] else: - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py index fdb763c81f..ad9c8d4f06 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py @@ -95,7 +95,12 @@ def execute_node(self, context, graph): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) + io_dict = { + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 6c38dd405d..1b1d632bda 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -147,7 +147,12 @@ def execute_node(self, context, graph): inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) super().toggle_clk(sim) - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = DataType[self.get_nodeattr("dataType")] target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py index 8273978c68..901ff99b0d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py @@ -115,10 +115,13 @@ def execute_node(self, context, graph): "inputs": {"in0": inp, "weights": wei * num_w_reps}, "outputs": {"out": []}, } - self.rtlsim_multi_io(sim, io_dict) - output = io_dict["outputs"]["out"] else: - output = self.rtlsim(sim, inp) + io_dict = { + "inputs": {"in0": inp}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() From de5cf618e339372942fabaf2801bb716dcec6b67 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 22 Oct 2024 15:55:43 +0100 Subject: [PATCH 040/102] [rtlsim] Delete obsolete rtlsim fct --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 87 ------------------- 1 file changed, 87 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index dafe469104..25a2ee130d 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -246,93 +246,6 @@ def toggle_clk(self, sim): else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" - def rtlsim(self, sim, inp, inp2=None): - """Runs the pyverilator simulation by passing the input values to the simulation, - toggle the clock and observing the execution time. Function contains also an - observation loop that can abort the simulation if no output value is produced - after 100 cycles.""" - - rtlsim_backend = self.get_nodeattr("rtlsim_backend") - if rtlsim_backend == "pyverilator": - trace_file = self.get_nodeattr("rtlsim_trace") - if trace_file != "": - if trace_file == "default": - trace_file = self.onnx_node.name + ".vcd" - sim.start_vcd_trace(trace_file) - inputs = inp - outputs = [] - sname = self.hls_sname() - o_ready = "out_" + sname + "_TREADY" - o_valid = "out_" + sname + "_TVALID" - o_data = "out_" + sname + "_TDATA" - in0_ready = "in0_" + sname + "_TREADY" - in0_valid = "in0_" + sname + "_TVALID" - in0_data = "in0_" + sname + "_TDATA" - in1_ready = "in1_" + sname + "_TREADY" - in1_valid = "in1_" + sname + "_TVALID" - in1_data = "in1_" + sname + "_TDATA" - - sim.io[o_ready] = 1 - - # observe if output is completely calculated - # observation_count will contain the number of cycles the calculation ran - num_out_values = self.get_number_output_values() - output_observed = False - observation_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - old_outputs = outputs - liveness_threshold = pyverilate_get_liveness_threshold_cycles() - - while not (output_observed): - sim.io[in0_valid] = 1 if len(inputs) > 0 else 0 - sim.io[in0_data] = inputs[0] if len(inputs) > 0 else 0 - if sim.io[in0_ready] == 1 and sim.io[in0_valid] == 1: - inputs = inputs[1:] - - if inp2 is not None: - sim.io[in1_valid] = 1 if len(inp2) > 0 else 0 - sim.io[in1_data] = inp2[0] if len(inp2) > 0 else 0 - if sim.io[in1_ready] == 1 and sim.io[in1_valid] == 1: - inp2 = inp2[1:] - - if sim.io[o_valid] == 1 and sim.io[o_ready] == 1: - outputs = outputs + [sim.io[o_data]] - sim.io.ap_clk = 1 - sim.io.ap_clk = 0 - - observation_count = observation_count + 1 - no_change_count = no_change_count + 1 - - if len(outputs) == num_out_values: - self.set_nodeattr("cycles_rtlsim", observation_count) - output_observed = True - - if no_change_count == liveness_threshold: - if old_outputs == outputs: - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the LIVENESS_THRESHOLD env.var. to a " - "larger value." - ) - else: - no_change_count = 0 - old_outputs = outputs - if trace_file != "": - sim.flush_vcd_trace() - sim.stop_vcd_trace() - elif rtlsim_backend == "pyxsi": - assert False, "pyxsi only supports rtlsim_multi_io for now" - else: - assert False, f"Unknown rtlsim_backend {rtlsim_backend}" - - return outputs - def rtlsim_multi_io(self, sim, io_dict): "Run rtlsim for this node, supports multiple i/o streams." # signal name suffix From 243768787c487c19de0165230350f05bc58dcdf1 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 25 Oct 2024 11:47:42 +0100 Subject: [PATCH 041/102] [prepare rtlsim] Clean up new rtl op fcts --- .../custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py | 4 ++-- .../custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py | 4 ++-- src/finn/custom_op/fpgadataflow/rtlbackend.py | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 4dc883f64f..88249f3673 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -290,6 +290,6 @@ def get_rtl_file_list(self): return verilog_files def get_verilog_paths(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir, os.environ["FINN_ROOT"] + "/finn-rtllib/mvu"] + verilog_paths = super().get_verilog_paths() + verilog_paths.append(os.environ["FINN_ROOT"] + "/finn-rtllib/mvu") return verilog_paths diff --git a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py index 901ff99b0d..11990ebd91 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py @@ -282,6 +282,6 @@ def get_rtl_file_list(self): return verilog_files def get_verilog_paths(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - verilog_paths = [code_gen_dir, os.environ["FINN_ROOT"] + "/finn-rtllib/mvu"] + verilog_paths = super().get_verilog_paths() + verilog_paths.append(os.environ["FINN_ROOT"] + "/finn-rtllib/mvu") return verilog_paths diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index 2d4dc7498f..fe7ac51a13 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -76,11 +76,14 @@ def prepare_rtlsim(self): return sim def get_verilog_paths(self): + """Returns path to code gen directory. Can be overwritten to + return additional paths to relevant verilog files""" code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") return [code_gen_dir] @abstractmethod def get_rtl_file_list(self): + """Returns list of rtl files. Needs to be filled by each node.""" pass @abstractmethod From cc307570c3daa0551039fe98e51bd0bc149ddbe8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 25 Oct 2024 16:04:50 +0100 Subject: [PATCH 042/102] [AddStreams] Enable both rtlsim backends --- .../fpgadataflow/hls/addstreams_hls.py | 51 ++++--------------- .../test_fpgadataflow_addstreams.py | 15 ++++-- 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 988d309efe..cd7686674b 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -32,7 +32,6 @@ import finn.util.pyxsi_rpcclient as pyxsi_rpcclient from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend -from finn.util.basic import make_build_dir from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -119,6 +118,7 @@ def execute_node(self, context, graph): context[node.output[0]].shape == exp_oshape ), "cppsim did not produce expected output shape" elif mode == "rtlsim": + rtlsim_backend = self.get_nodeattr("rtlsim_backend") sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp0 = npy_to_rtlsim_input( @@ -127,10 +127,16 @@ def execute_node(self, context, graph): rtlsim_inp1 = npy_to_rtlsim_input( "{}/input_1.npy".format(code_gen_dir), export_idt, nbits ) - # super().reset_rtlsim(sim) - # super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp0, rtlsim_inp1) - pyxsi_rpcclient.close_rtlsim(sim) + if rtlsim_backend == "pyverilator": + super().reset_rtlsim(sim) + super().toggle_clk(sim) + else: + pyxsi_rpcclient.reset_rtlsim(sim) + io_dict = {"inputs": {"in0": rtlsim_inp0, "in1": rtlsim_inp1}, "outputs": {"out": []}} + self.rtlsim_multi_io(sim, io_dict) + rtlsim_output = io_dict["outputs"]["out"] + if rtlsim_backend == "pyxsi": + pyxsi_rpcclient.close_rtlsim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() @@ -254,38 +260,3 @@ def pragmas(self): "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") - - def prepare_rtlsim(self): - verilog_files = self.get_all_verilog_filenames(abspath=True) - single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") - - ret = pyxsi_rpcclient.compile_sim_obj( - self.get_verilog_top_module_name(), verilog_files, single_src_dir - ) - - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", ret[0] + "/" + ret[1]) - - def get_rtlsim(self): - sim_xo_path = self.get_nodeattr("rtlsim_so") - sim_base, sim_rel = sim_xo_path.split("xsim.dir") - sim_rel = "xsim.dir" + sim_rel - tracefile = None - return pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, tracefile) - - def rtlsim(self, sim, inp, inp2=None): - """Runs the pyverilator simulation by passing the input values to the simulation, - toggle the clock and observing the execution time. Function contains also an - observation loop that can abort the simulation if no output value is produced - after 100 cycles.""" - - pyxsi_rpcclient.reset_rtlsim(sim) - io_dict = {"inputs": {"in0": inp, "in1": inp2}, "outputs": {"out": []}} - num_out_values = self.get_number_output_values() - sname = "_" + self.hls_sname() + "_" - total_cycle_count = pyxsi_rpcclient.rtlsim_multi_io( - sim, io_dict, num_out_values, sname=sname - ) - self.set_nodeattr("cycles_rtlsim", total_cycle_count) - - return io_dict["outputs"]["out"] diff --git a/tests/fpgadataflow/test_fpgadataflow_addstreams.py b/tests/fpgadataflow/test_fpgadataflow_addstreams.py index 484cbbe04a..3b12e86bfa 100644 --- a/tests/fpgadataflow/test_fpgadataflow_addstreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_addstreams.py @@ -47,7 +47,7 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -def make_addstreams_modelwrapper(ch, pe, idt): +def make_addstreams_modelwrapper(ch, pe, idt, rtlsim_backend): inp1 = helper.make_tensor_value_info("inp1", TensorProto.FLOAT, [1, ch]) inp2 = helper.make_tensor_value_info("inp2", TensorProto.FLOAT, [1, ch]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ch]) @@ -62,6 +62,7 @@ def make_addstreams_modelwrapper(ch, pe, idt): PE=pe, inputDataType=idt.name, preferred_impl_style="hls", + rtlsim_backend=rtlsim_backend, ) graph = helper.make_graph( nodes=[addstreams_node], @@ -91,20 +92,28 @@ def prepare_inputs(input1, input2): @pytest.mark.parametrize("fold", [-1, 2, 1]) # execution mode @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +# rtlsim_backend +@pytest.mark.parametrize("rtlsim_backend", ["pyverilator", "pyxsi"]) @pytest.mark.fpgadataflow @pytest.mark.vivado -def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode): +def test_fpgadataflow_addstreams(idt, ch, fold, exec_mode, rtlsim_backend): if fold == -1: pe = 1 else: pe = max(1, ch // fold) assert ch % pe == 0 + if exec_mode == "cppsim" and rtlsim_backend == "pyxsi": + pytest.skip( + """Skip combination of paramaters because rtlsim_backend + only influences rtlsim and not cppsim.""" + ) + # generate input data x1 = gen_finn_dt_tensor(idt, (1, ch)) x2 = gen_finn_dt_tensor(idt, (1, ch)) - model = make_addstreams_modelwrapper(ch, pe, idt) + model = make_addstreams_modelwrapper(ch, pe, idt, rtlsim_backend) # prepare input data input_dict = prepare_inputs(x1, x2) From e1a411bbef6c9fbb71f5e5f7c4e5080eca688a51 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 21 Oct 2024 21:11:48 +0100 Subject: [PATCH 043/102] [xsi] add a firs C++ templatet draft for driving XSI rtlsim directly --- src/finn/core/rtlsim_exec.py | 114 +++++++++- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 272 ++++++++++++++++++++++++ 2 files changed, 385 insertions(+), 1 deletion(-) create mode 100644 src/finn/qnn-data/cpp/xsi_simdriver.cpp diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b8623086ef..f5e49dddbe 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -26,11 +26,18 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import numpy as np import os from pyverilator.util.axi_utils import reset_rtlsim, rtlsim_multi_io from qonnx.custom_op.registry import getCustomOp -from finn.util.basic import make_build_dir, pyverilate_get_liveness_threshold_cycles +from finn.util.basic import ( + get_finn_root, + get_vivado_root, + launch_process_helper, + make_build_dir, + pyverilate_get_liveness_threshold_cycles, +) from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy from finn.util.pyverilator import pyverilate_stitched_ip @@ -105,6 +112,109 @@ def file_to_basename(x): return os.path.basename(os.path.realpath(x)) +def rtlsim_exec_cppxsi(model, execution_context): + """Use XSI to execute given model with stitched IP. The execution + context contains the input values. + """ + n_inputs = 1 + max_iters = 1000 + + # ensure stitched ip project already exists + assert os.path.isfile( + model.get_metadata_prop("wrapper_filename") + ), """The + file name from metadata property "wrapper_filename" doesn't exist.""" + assert os.path.isdir( + model.get_metadata_prop("vivado_stitch_proj") + ), """The + directory from metadata property "vivado_stitch_proj" doesn't exist""" + trace_file = model.get_metadata_prop("rtlsim_trace") + io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict(model, execution_context) + + # prepare rtlsim model + rtlsim_so = model.get_metadata_prop("rtlsim_so") + top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename")) + top_module_name = top_module_file_name.strip(".v") + if (rtlsim_so is None) or (not os.path.isfile(rtlsim_so)): + vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj") + with open(vivado_stitch_proj_dir + "/all_verilog_srcs.txt", "r") as f: + all_verilog_srcs = f.read().split() + single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") + + rtlsim_so = pyxsi_rpcclient.compile_sim_obj( + top_module_name, all_verilog_srcs, single_src_dir + ) + # save generated lib filename in attribute + model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) + sim_base, sim_rel = rtlsim_so + # pass in correct tracefile from attribute + if trace_file == "default": + trace_file = top_module_file_name + ".wdb" + else: + sim_base, sim_rel = rtlsim_so.split("xsim.dir") + sim_rel = "xsim.dir" + sim_rel + + fifosim_cpp_fname = get_finn_root() + "/src/finn/qnn-data/cpp/xsi_simdriver.cpp" + with open(fifosim_cpp_fname, "r") as f: + fifosim_cpp_template = f.read() + assert len(model.graph.input) == 1, "Only a single input stream is supported" + assert len(model.graph.output) == 1, "Only a single output stream is supported" + iname = model.graph.input[0].name + first_node = model.find_consumer(iname) + oname = model.graph.output[0].name + last_node = model.find_producer(oname) + assert (first_node is not None) and (last_node is not None), "Failed to find first/last nodes" + fnode_inst = getCustomOp(first_node) + lnode_inst = getCustomOp(last_node) + ishape_folded = fnode_inst.get_folded_input_shape() + oshape_folded = lnode_inst.get_folded_output_shape() + + template_dict = { + "ITERS_PER_INPUT": np.prod(ishape_folded[:-1]), + "ITERS_PER_OUTPUT": np.prod(oshape_folded[:-1]), + "N_INPUTS": n_inputs, + "MAX_ITERS": max_iters, + "TOP_MODULE_NAME": top_module_name, + } + + for key, val in template_dict.items(): + fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) + + with open(sim_base + "/rtlsim_xsi.cpp", "w") as f: + f.write(fifosim_cpp_template) + + vivado_incl_dir = get_vivado_root() + "/data/xsim/include" + xsi_include_dir = get_finn_root() + "/deps/pyxsi/src" + + build_cmd = [ + "g++", + f"-I{xsi_include_dir}", + f"-I{vivado_incl_dir}", + "-std=c++14", + "-O3", + "-o", + "rtlsim_xsi", + "rtlsim_xsi.cpp", + f"{xsi_include_dir}/xsi_loader.cpp", + "-ldl", + "-lrt", + ] + with open(sim_base + "/compile_rtlsim.sh", "w") as f: + f.write(" ".join(build_cmd)) + + launch_process_helper(build_cmd, cwd=sim_base) + + assert os.path.isfile(sim_base + "/rtlsim_xsi"), "Failed to compile rtlsim executable" + runsim_env = os.environ.copy() + runsim_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" + runsim_cmd = ["./rtlsim_xsi"] + + with open(sim_base + "/run_rtlsim.sh", "w") as f: + f.write(f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi") + + launch_process_helper(runsim_cmd, proc_env=runsim_env, cwd=sim_base) + + def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): """Use PyXSI to execute given model with stitched IP. The execution context contains the input values. Hook functions can be optionally @@ -146,6 +256,8 @@ def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): trace_file = top_module_file_name + ".wdb" sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) else: + sim_base, sim_rel = rtlsim_so.split("xsim.dir") + sim_rel = "xsim.dir" + sim_rel sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) # reset and call rtlsim, including any pre/post hooks diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp new file mode 100644 index 0000000000..69752d55c4 --- /dev/null +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -0,0 +1,272 @@ +/* Copyright (C) 2024, Advanced Micro Devices, Inc. +All rights reserved. +# +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +# +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +# +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +# +* Neither the name of FINN nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. +# +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include +#include +#include + +#include "xsi_loader.h" + +#include +#include +#include +#include +#include +#include + +#ifdef DEBUG +#define TRACE(x) x +#else +#define TRACE(x) ; +#endif + +using namespace std; + +const char SLV_U=0; +const char SLV_X=1; +const char SLV_0=2; +const char SLV_1=3; +const char SLV_Z=4; +const char SLV_W=5; +const char SLV_L=6; +const char SLV_H=7; +const char SLV_DASH=8; +const s_xsi_vlog_logicval one_val = {0X00000001, 0X00000000}; +const s_xsi_vlog_logicval zero_val = {0X00000000, 0X00000000}; + +enum class PortDirection {INPUT, OUTPUT, INOUT}; + +size_t roundup_int_div(size_t dividend, size_t divisor) { + return (dividend + divisor - 1) / divisor; +} + +void clear_bit_atindex(XSI_UINT32 &container, size_t ind) { + container = container & ~((XSI_UINT32)1 << ind); +} + +void set_bit_atindex(XSI_UINT32 &container, size_t ind) { + container = container | ((XSI_UINT32)1 << ind); +} + +bool test_bit_atindex(XSI_UINT32 &container, size_t ind) { + return ((container & ((XSI_UINT32)1 << ind)) > 0 ? true : false); +} + +void set_logic_val_atindex(s_xsi_vlog_logicval &logicval, size_t ind, char val) { + switch(val) { + case '0': + clear_bit_atindex((logicval.aVal), ind); + clear_bit_atindex((logicval.bVal), ind); + break; + case '1': + set_bit_atindex((logicval.aVal), ind); + clear_bit_atindex((logicval.bVal), ind); + break; + case 'X': + set_bit_atindex((logicval.aVal), ind); + set_bit_atindex((logicval.bVal), ind); + break; + case 'Z': + clear_bit_atindex((logicval.aVal), ind); + set_bit_atindex((logicval.bVal), ind); + break; + default: + throw std::runtime_error("Unrecognized value for set_logic_val_atindex: "+val); + } +} + +void string_to_logic_val(std::string str, s_xsi_vlog_logicval* value) { + size_t str_len = str.length(); + size_t num_words = roundup_int_div(str_len, 32); + memset(value, 0, sizeof(s_xsi_vlog_logicval)*num_words); + for(size_t i = 0; i < str_len; i++) { + size_t array_ind = i / 32; + size_t bit_ind = i % 32; + set_logic_val_atindex(value[array_ind], bit_ind, str[str_len-i-1]); + } +} + +std::string logic_val_to_string(s_xsi_vlog_logicval* value, size_t n_bits) { + std::string ret(n_bits, '?'); + for(size_t i = 0; i < n_bits; i++) { + size_t array_ind = i / 32; + size_t bit_ind = i % 32; + bool is_set_aVal = test_bit_atindex(value[array_ind].aVal, bit_ind); + bool is_set_bVal = test_bit_atindex(value[array_ind].bVal, bit_ind); + if(!is_set_aVal && !is_set_bVal) { + ret[n_bits-i-1] = '0'; + } else if(is_set_aVal && !is_set_bVal) { + ret[n_bits-i-1] = '1'; + } else if(!is_set_aVal && is_set_bVal) { + ret[n_bits-i-1] = 'X'; + } else { + ret[n_bits-i-1] = 'Z'; + } + } + //std::cout << "logic_val_to_string logicval.a=" << std::hex << value[0].aVal << " logicval.b=" << value[0].bVal << " retstr " << ret << std::dec << std::endl; + return ret; +} + +// top-level sim object +Xsi::Loader *top; +// mapping of port names to port numbers +map port_map; + +void populate_port_map() { + for(int i=0; inum_ports(); i++) { + string port_name = top->get_str_property_port(i, xsiNameTopPort); + port_map[port_name] = i; + } +} + +void set_bool(string name) { + top->put_value(port_map[name], &one_val); +} + +void clear_bool(string name) { + top->put_value(port_map[name], &zero_val); +} + +bool chk_bool(string name) { + s_xsi_vlog_logicval buf = {0X00000000, 0X00000000}; + top->get_value(port_map[name], &buf); + return logic_val_to_string(&buf, 1)[0] == '1'; +} + +inline void toggle_clk_1() { + set_bool("ap_clk"); + top->run(5); +} + +inline void toggle_clk_0() { + clear_bool("ap_clk"); + top->run(5); +} + +inline void toggle_clk() { + toggle_clk_0(); + toggle_clk_1(); +} + + +void reset() { + clear_bool("ap_clk"); + clear_bool("ap_rst_n"); + for(unsigned i = 0; i < 2; i++) { + toggle_clk(); + } + set_bool("ap_rst_n"); +} + +int main(int argc, char *argv[]) { + std::string simengine_libname = "librdi_simulator_kernel.so"; + std::string design_libname = "xsim.dir/@TOP_MODULE_NAME@/xsimk.so"; + top = new Xsi::Loader(design_libname, simengine_libname); + + s_xsi_setup_info info; + memset(&info, 0, sizeof(info)); + info.logFileName = NULL; + char wdbName[] = "test.wdb"; + info.wdbFileName = wdbName; + + top->open(&info); + top->trace_all(); + + populate_port_map(); + + unsigned n_iters_per_input = @ITERS_PER_INPUT@; + unsigned n_iters_per_output = @ITERS_PER_OUTPUT@; + unsigned n_inputs = @N_INPUTS@; + unsigned max_iters = @MAX_ITERS@; + + reset(); + + unsigned n_in_txns = 0, n_out_txns = 0, iters = 0, last_output_at = 0; + unsigned latency = 0; + + bool exit_criterion = false; + + cout << "Simulation starting" << endl; + cout << "Number of inputs to write " << n_iters_per_input * n_inputs << endl; + cout << "Number of outputs to expect " << n_iters_per_output * n_inputs << endl; + cout << "No-output timeout clock cycles " << max_iters << endl; + + chrono::steady_clock::time_point begin = chrono::steady_clock::now(); + + while(!exit_criterion) { + toggle_clk_0(); + + set_bool("m_axis_0_tready"); + set_bool("s_axis_0_tvalid"); + + toggle_clk(); + iters++; + if(iters % 1000 == 0) { + cout << "Elapsed iters " << iters << " inps " << n_in_txns << " outs " << n_out_txns << endl; + chrono::steady_clock::time_point end = chrono::steady_clock::now(); + cout << "Elapsed since last report = " << chrono::duration_cast(end - begin).count() << "[s]" << endl; + begin = end; + } + if(chk_bool("s_axis_0_tready") && chk_bool("s_axis_0_tvalid")) { + n_in_txns++; + if(n_in_txns == n_iters_per_input * n_inputs) { + clear_bool("s_axis_0_tvalid"); + cout << "All inputs written at cycle " << iters << endl; + } + } + if(chk_bool("m_axis_0_tvalid")) { + n_out_txns++; + last_output_at = iters; + if(n_out_txns == n_iters_per_output) { + latency = iters; + } + } + + exit_criterion = ((n_in_txns >= n_iters_per_input * n_inputs) && (n_out_txns >= n_iters_per_output * n_inputs)) || ((iters-last_output_at) > max_iters); + } + + cout << "Simulation finished" << endl; + cout << "Number of inputs consumed " << n_in_txns << endl; + cout << "Number of outputs produced " << n_out_txns << endl; + cout << "Number of clock cycles " << iters << endl; + + ofstream results_file; + results_file.open("results.txt", ios::out | ios::trunc); + results_file << "N_IN_TXNS" << "\t" << n_in_txns << endl; + results_file << "N_OUT_TXNS" << "\t" << n_out_txns << endl; + results_file << "cycles" << "\t" << iters << endl; + results_file << "N" << "\t" << n_inputs << endl; + results_file << "latency_cycles" << "\t" << latency << endl; + //@FIFO_DEPTH_LOGGING@ + results_file.close(); + top->close(); + delete top; + + return 0; +} From 600b99984c5d9fe24a67a5f77065d3a0c31e7083 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 21 Oct 2024 23:25:46 +0200 Subject: [PATCH 044/102] [xsi] remodel C++ driver from rtlsim_multi_io in pyxsi --- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 73 +++++++++++++++++-------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 69752d55c4..2fb079e0a6 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -178,10 +178,11 @@ inline void toggle_clk() { void reset() { clear_bool("ap_clk"); clear_bool("ap_rst_n"); - for(unsigned i = 0; i < 2; i++) { - toggle_clk(); - } + toggle_clk(); + toggle_clk(); set_bool("ap_rst_n"); + toggle_clk(); + toggle_clk(); } int main(int argc, char *argv[]) { @@ -209,6 +210,7 @@ int main(int argc, char *argv[]) { unsigned n_in_txns = 0, n_out_txns = 0, iters = 0, last_output_at = 0; unsigned latency = 0; + unsigned cycles_since_last_output = 0; bool exit_criterion = false; @@ -219,13 +221,48 @@ int main(int argc, char *argv[]) { chrono::steady_clock::time_point begin = chrono::steady_clock::now(); + bool input_done = false; + bool output_done = false; + bool timeout = false; + + set_bool("m_axis_0_tready"); + while(!exit_criterion) { + map signals_to_write; toggle_clk_0(); - set_bool("m_axis_0_tready"); - set_bool("s_axis_0_tvalid"); + if(chk_bool("s_axis_0_tready") && chk_bool("s_axis_0_tvalid")) { + n_in_txns++; + } + + if(chk_bool("m_axis_0_tready") && chk_bool("m_axis_0_tvalid")) { + n_out_txns++; + } else { + cycles_since_last_output++; + } + + if(n_in_txns == n_iters_per_input * n_inputs) { + cout << "All inputs written at cycle " << iters << endl; + signals_to_write["s_axis_0_tvalid"] = false; + } else if(n_in_txns < n_iters_per_input * n_inputs) { + signals_to_write["s_axis_0_tvalid"] = true; + } else { + cout << "Unknown stream condition for input!" << endl; + signals_to_write["s_axis_0_tvalid"] = false; + } + + toggle_clk_1(); + // actuall write the desired signals from the map + for (auto const& x : signals_to_write) + { + if(x.second) { + set_bool(x.first); + } else { + clear_bool(x.first); + + } + } - toggle_clk(); iters++; if(iters % 1000 == 0) { cout << "Elapsed iters " << iters << " inps " << n_in_txns << " outs " << n_out_txns << endl; @@ -233,28 +270,21 @@ int main(int argc, char *argv[]) { cout << "Elapsed since last report = " << chrono::duration_cast(end - begin).count() << "[s]" << endl; begin = end; } - if(chk_bool("s_axis_0_tready") && chk_bool("s_axis_0_tvalid")) { - n_in_txns++; - if(n_in_txns == n_iters_per_input * n_inputs) { - clear_bool("s_axis_0_tvalid"); - cout << "All inputs written at cycle " << iters << endl; - } - } - if(chk_bool("m_axis_0_tvalid")) { - n_out_txns++; - last_output_at = iters; - if(n_out_txns == n_iters_per_output) { - latency = iters; - } - } - exit_criterion = ((n_in_txns >= n_iters_per_input * n_inputs) && (n_out_txns >= n_iters_per_output * n_inputs)) || ((iters-last_output_at) > max_iters); + input_done = (n_in_txns >= n_iters_per_input * n_inputs); + output_done = (n_out_txns >= n_iters_per_output * n_inputs); + timeout = (cycles_since_last_output > max_iters); + + exit_criterion = (input_done && output_done) || timeout; } cout << "Simulation finished" << endl; cout << "Number of inputs consumed " << n_in_txns << endl; cout << "Number of outputs produced " << n_out_txns << endl; cout << "Number of clock cycles " << iters << endl; + cout << "Input done? " << input_done << endl; + cout << "Output done? " << output_done << endl; + cout << "Timeout? " << timeout << endl; ofstream results_file; results_file.open("results.txt", ios::out | ios::trunc); @@ -266,7 +296,6 @@ int main(int argc, char *argv[]) { //@FIFO_DEPTH_LOGGING@ results_file.close(); top->close(); - delete top; return 0; } From 2223fed9a34df203bd4fbf5ad9a2748045a918da Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 10:45:19 +0200 Subject: [PATCH 045/102] [xsi] add comments to XSI C++ sim driver and Python fxn --- src/finn/core/rtlsim_exec.py | 66 ++++++++++--- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 124 ++++++++++++++---------- 2 files changed, 125 insertions(+), 65 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index f5e49dddbe..5bfdb1ae45 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -112,12 +112,34 @@ def file_to_basename(x): return os.path.basename(os.path.realpath(x)) -def rtlsim_exec_cppxsi(model, execution_context): - """Use XSI to execute given model with stitched IP. The execution - context contains the input values. +def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc_cpp=""): + """Use XSI C++ rtl simulation to execute given model with stitched IP. + The dummy_data_mode flag controls whether the simulation is driven by + dummy data or real data. The execution_context parameter must be formatted + according to whether dummy or real data is used. + Example with dummy_data = True: + execution_context = { + "inputs" : {"" : }, + "outputs" : {"" : }, + } + Example with dummy_data = False + execution_context = { + "inputs" : {"" : [list_of_input_data] }, + # output lists will be filled with actual data on return + "outputs" : {"" : [] }, + } + + The postproc_cpp optional argument can be used to inject C++ code to retrieve + extra data when the simulation is finished. See the @POSTPROC_CPP@ template argument + in the xsi_simdriver.cpp file to see what context and functions are available. + """ - n_inputs = 1 - max_iters = 1000 + # TODO: support running functional rtlsim with real I/O data + # TODO: support running with multiple inputs/outputs + # TODO: rename utility fxn to remove "pyverilate", used for other backends too + timeout_cycles = pyverilate_get_liveness_threshold_cycles() + + assert dummy_data_mode, "Only dummy_data_mode=True is supported for now" # ensure stitched ip project already exists assert os.path.isfile( @@ -131,7 +153,7 @@ def rtlsim_exec_cppxsi(model, execution_context): trace_file = model.get_metadata_prop("rtlsim_trace") io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict(model, execution_context) - # prepare rtlsim model + # prepare rtlsim compiled object (unless it already exists) rtlsim_so = model.get_metadata_prop("rtlsim_so") top_module_file_name = file_to_basename(model.get_metadata_prop("wrapper_filename")) top_module_name = top_module_file_name.strip(".v") @@ -153,7 +175,7 @@ def rtlsim_exec_cppxsi(model, execution_context): else: sim_base, sim_rel = rtlsim_so.split("xsim.dir") sim_rel = "xsim.dir" + sim_rel - + # prepare the C++ sim driver template fifosim_cpp_fname = get_finn_root() + "/src/finn/qnn-data/cpp/xsi_simdriver.cpp" with open(fifosim_cpp_fname, "r") as f: fifosim_cpp_template = f.read() @@ -168,24 +190,38 @@ def rtlsim_exec_cppxsi(model, execution_context): lnode_inst = getCustomOp(last_node) ishape_folded = fnode_inst.get_folded_input_shape() oshape_folded = lnode_inst.get_folded_output_shape() + # TODO: retrieve the number of inputs from execution_context + n_inputs = 1 + # fill in the template arguments for sim driver template_dict = { + # number of input transactions per inference "ITERS_PER_INPUT": np.prod(ishape_folded[:-1]), + # number of output transactions per inference "ITERS_PER_OUTPUT": np.prod(oshape_folded[:-1]), + # number of inferences "N_INPUTS": n_inputs, - "MAX_ITERS": max_iters, + # max number of cycles to wait for output activity before timeout + "MAX_ITERS": timeout_cycles, + # name of the top-level HDL module "TOP_MODULE_NAME": top_module_name, + # names of the top-level AXI streams and signals + # TODO retrieve stream and signal names from model + "INSTREAM_NAME": "s_axis_0", + "OUTSTREAM_NAME": "m_axis_0", + "CLK_NAME": "ap_clk", + "NRST_NAME": "ap_rst_n", + # TODO control tracing and trace filename + "TRACE_FILE": top_module_name + ".wdb", } - for key, val in template_dict.items(): fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) - with open(sim_base + "/rtlsim_xsi.cpp", "w") as f: f.write(fifosim_cpp_template) vivado_incl_dir = get_vivado_root() + "/data/xsim/include" xsi_include_dir = get_finn_root() + "/deps/pyxsi/src" - + # launch g++ to compile the rtlsim executable build_cmd = [ "g++", f"-I{xsi_include_dir}", @@ -199,19 +235,19 @@ def rtlsim_exec_cppxsi(model, execution_context): "-ldl", "-lrt", ] + # write compilation command to a file for easy re-running/debugging with open(sim_base + "/compile_rtlsim.sh", "w") as f: f.write(" ".join(build_cmd)) - launch_process_helper(build_cmd, cwd=sim_base) - assert os.path.isfile(sim_base + "/rtlsim_xsi"), "Failed to compile rtlsim executable" + + # launch the rtlsim executable + # important to specify LD_LIBRARY_PATH here for XSI to work correctly runsim_env = os.environ.copy() runsim_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" runsim_cmd = ["./rtlsim_xsi"] - with open(sim_base + "/run_rtlsim.sh", "w") as f: f.write(f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi") - launch_process_helper(runsim_cmd, proc_env=runsim_env, cwd=sim_base) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 2fb079e0a6..d8532fa090 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -26,11 +26,20 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* C++ streaming rtlsim driver template for Verilog designs using XSI + - pushes input data into input AXI stream(s), either dummy or from file + - dumps output data from output AXI stream(s) if desired + - option to examine final simulation status to capture more info + +Note: all code template arguments formatted like @TEMPLATE@ must be filled in +prior to compilation +*/ + #include #include #include #include - +// currently using the pyxsi version and not the original Vivado version #include "xsi_loader.h" #include @@ -40,44 +49,39 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include -#ifdef DEBUG -#define TRACE(x) x -#else -#define TRACE(x) ; -#endif - using namespace std; -const char SLV_U=0; -const char SLV_X=1; -const char SLV_0=2; -const char SLV_1=3; -const char SLV_Z=4; -const char SLV_W=5; -const char SLV_L=6; -const char SLV_H=7; -const char SLV_DASH=8; +// utility functions and other declarations: +// constant binary 1- and 0-values for control logic const s_xsi_vlog_logicval one_val = {0X00000001, 0X00000000}; const s_xsi_vlog_logicval zero_val = {0X00000000, 0X00000000}; -enum class PortDirection {INPUT, OUTPUT, INOUT}; - +// rounded-up integer division size_t roundup_int_div(size_t dividend, size_t divisor) { return (dividend + divisor - 1) / divisor; } +// clear bit of 32-bit value at given index +// index must be in range [0, 31] void clear_bit_atindex(XSI_UINT32 &container, size_t ind) { container = container & ~((XSI_UINT32)1 << ind); } + +// set bit of 32-bit value at given index +// index must be in range [0, 31] void set_bit_atindex(XSI_UINT32 &container, size_t ind) { container = container | ((XSI_UINT32)1 << ind); } +// test bit of 32-bit value at given index +// index must be in range [0, 31] bool test_bit_atindex(XSI_UINT32 &container, size_t ind) { return ((container & ((XSI_UINT32)1 << ind)) > 0 ? true : false); } +// set bit of given s_xsi_vlog_logicval (Verilog signal dtype) +// index must be in range [0, 31] void set_logic_val_atindex(s_xsi_vlog_logicval &logicval, size_t ind, char val) { switch(val) { case '0': @@ -101,6 +105,8 @@ void set_logic_val_atindex(s_xsi_vlog_logicval &logicval, size_t ind, char val) } } +// convert a given Verilog logic value string into an array of s_xsi_vlog_logicval +// string must be composed of Verilog logic values: 0, 1, X, Z void string_to_logic_val(std::string str, s_xsi_vlog_logicval* value) { size_t str_len = str.length(); size_t num_words = roundup_int_div(str_len, 32); @@ -112,6 +118,9 @@ void string_to_logic_val(std::string str, s_xsi_vlog_logicval* value) { } } +// convert array of Verilog logic values to a string +// n_bits specifies how many actual bits of value the array contains +// length of returned string (in characters) will be equal to n_bits std::string logic_val_to_string(s_xsi_vlog_logicval* value, size_t n_bits) { std::string ret(n_bits, '?'); for(size_t i = 0; i < n_bits; i++) { @@ -133,11 +142,12 @@ std::string logic_val_to_string(s_xsi_vlog_logicval* value, size_t n_bits) { return ret; } -// top-level sim object +// top-level sim object for the simulation Xsi::Loader *top; // mapping of port names to port numbers map port_map; +// walk the top-level IO interfaces to populate the port_map void populate_port_map() { for(int i=0; inum_ports(); i++) { string port_name = top->get_str_property_port(i, xsiNameTopPort); @@ -145,62 +155,69 @@ void populate_port_map() { } } +// set the 1-bit signal with given name to 1 void set_bool(string name) { top->put_value(port_map[name], &one_val); } +// set the 1-bit signal with given name to 0 void clear_bool(string name) { top->put_value(port_map[name], &zero_val); } +// check the 1-bit signal with given name for equality to 1 bool chk_bool(string name) { s_xsi_vlog_logicval buf = {0X00000000, 0X00000000}; top->get_value(port_map[name], &buf); return logic_val_to_string(&buf, 1)[0] == '1'; } +// rising clock edge + high clock inline void toggle_clk_1() { - set_bool("ap_clk"); + set_bool("@CLK_NAME@"); top->run(5); } +// falling clock edge + low clock inline void toggle_clk_0() { - clear_bool("ap_clk"); + clear_bool("@CLK_NAME@"); top->run(5); } +// drive simulation for 1 clock period inline void toggle_clk() { toggle_clk_0(); toggle_clk_1(); } - +// apply reset to the simulation void reset() { - clear_bool("ap_clk"); - clear_bool("ap_rst_n"); + clear_bool("@CLK_NAME@"); + clear_bool("@NRST_NAME@"); toggle_clk(); toggle_clk(); - set_bool("ap_rst_n"); + set_bool("@NRST_NAME@"); toggle_clk(); toggle_clk(); } int main(int argc, char *argv[]) { + // load pre-compiled rtl simulation std::string simengine_libname = "librdi_simulator_kernel.so"; std::string design_libname = "xsim.dir/@TOP_MODULE_NAME@/xsimk.so"; top = new Xsi::Loader(design_libname, simengine_libname); - s_xsi_setup_info info; memset(&info, 0, sizeof(info)); info.logFileName = NULL; - char wdbName[] = "test.wdb"; + char wdbName[] = "@TRACE_FILE@"; info.wdbFileName = wdbName; - top->open(&info); + // TODO add option to enable/disable tracing for faster sim? top->trace_all(); populate_port_map(); + // how much data to push into/pull out of sim unsigned n_iters_per_input = @ITERS_PER_INPUT@; unsigned n_iters_per_output = @ITERS_PER_OUTPUT@; unsigned n_inputs = @N_INPUTS@; @@ -225,59 +242,65 @@ int main(int argc, char *argv[]) { bool output_done = false; bool timeout = false; - set_bool("m_axis_0_tready"); + // enable reception on the output stream + set_bool("@OUTSTREAM_NAME@_tready"); while(!exit_criterion) { + // keep track of which signals to write + // actual writes will be done after rising clock edge + // TODO needs to be extended to non-bool signals for actual input data map signals_to_write; + // toggle falling clock edge and drive low clock toggle_clk_0(); - - if(chk_bool("s_axis_0_tready") && chk_bool("s_axis_0_tvalid")) { + // check for transactions on the input stream + if(chk_bool("@INSTREAM_NAME@_tready") && chk_bool("@INSTREAM_NAME@_tvalid")) { n_in_txns++; } - - if(chk_bool("m_axis_0_tready") && chk_bool("m_axis_0_tvalid")) { + // check for transactions on the output stream + if(chk_bool("@OUTSTREAM_NAME@_tready") && chk_bool("@OUTSTREAM_NAME@_tvalid")) { n_out_txns++; + // TODO add output data capture to file here + // (unless we are in dummy data mode) } else { + // keep track of no-activity cycles for timeout cycles_since_last_output++; } - + // determine whether we have more inputs to feed if(n_in_txns == n_iters_per_input * n_inputs) { - cout << "All inputs written at cycle " << iters << endl; - signals_to_write["s_axis_0_tvalid"] = false; + signals_to_write["@INSTREAM_NAME@_tvalid"] = false; } else if(n_in_txns < n_iters_per_input * n_inputs) { - signals_to_write["s_axis_0_tvalid"] = true; + signals_to_write["@INSTREAM_NAME@_tvalid"] = true; } else { + // more input transactions than specified, should never happen + // most likely a bug in the C++ driver code if this happens cout << "Unknown stream condition for input!" << endl; - signals_to_write["s_axis_0_tvalid"] = false; + signals_to_write["@INSTREAM_NAME@_tvalid"] = false; } - + // toggle rising clock edge and drive high clock toggle_clk_1(); - // actuall write the desired signals from the map + // actually write the desired signals from the map for (auto const& x : signals_to_write) { - if(x.second) { - set_bool(x.first); - } else { - clear_bool(x.first); - - } + if(x.second) set_bool(x.first); + else clear_bool(x.first); } - + // keep track of elapsed clock cycles iters++; + // show a progress message once in a while if(iters % 1000 == 0) { cout << "Elapsed iters " << iters << " inps " << n_in_txns << " outs " << n_out_txns << endl; chrono::steady_clock::time_point end = chrono::steady_clock::now(); cout << "Elapsed since last report = " << chrono::duration_cast(end - begin).count() << "[s]" << endl; begin = end; } - + // check whether the exit criteria are reached input_done = (n_in_txns >= n_iters_per_input * n_inputs); output_done = (n_out_txns >= n_iters_per_output * n_inputs); timeout = (cycles_since_last_output > max_iters); - exit_criterion = (input_done && output_done) || timeout; } + // dump final simulation statistics to stdout and file cout << "Simulation finished" << endl; cout << "Number of inputs consumed " << n_in_txns << endl; cout << "Number of outputs produced " << n_out_txns << endl; @@ -293,7 +316,8 @@ int main(int argc, char *argv[]) { results_file << "cycles" << "\t" << iters << endl; results_file << "N" << "\t" << n_inputs << endl; results_file << "latency_cycles" << "\t" << latency << endl; - //@FIFO_DEPTH_LOGGING@ + // optionally, extract more data from final status + @POSTPROC_CPP@ results_file.close(); top->close(); From 02f9c4bb85f5174aadb38e5037e041a87c2635ba Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 11:21:23 +0200 Subject: [PATCH 046/102] [xsi] fix missing template, parse and return results dict --- src/finn/core/rtlsim_exec.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 5bfdb1ae45..b04e305357 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -213,6 +213,8 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc "NRST_NAME": "ap_rst_n", # TODO control tracing and trace filename "TRACE_FILE": top_module_name + ".wdb", + # code to post-process final sim status to extract more data + "POSTPROC_CPP": postproc_cpp, } for key, val in template_dict.items(): fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) @@ -250,6 +252,15 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc f.write(f"LD_LIBRARY_PATH={runsim_env['LD_LIBRARY_PATH']} ./rtlsim_xsi") launch_process_helper(runsim_cmd, proc_env=runsim_env, cwd=sim_base) + # parse results file and return dict + with open(sim_base + "/results.txt", "r") as f: + results = f.read().strip().split("\n") + ret_dict = {} + for result_line in results: + key, val = result_line.split("\t") + ret_dict[key] = int(val) + return ret_dict + def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): """Use PyXSI to execute given model with stitched IP. The execution From 2ae55c17504d5be08187ff7ca9b85eb5250f318e Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 11:49:04 +0200 Subject: [PATCH 047/102] [xsi] util functions to read signals in C++ template --- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index d8532fa090..7de61af1d4 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -155,6 +155,21 @@ void populate_port_map() { } } +string read_signal_binstr(string name) { + int port_id = port_map[name]; + int n_bits = top->get_int_property_port(port_id, xsiHDLValueSize); + size_t n_logicvals = roundup_int_div(n_bits, 32); + s_xsi_vlog_logicval *buf = new s_xsi_vlog_logicval[n_logicvals]; + top->get_value(port_id, buf); + string ret = logic_val_to_string(buf, n_bits); + delete [] buf; + return ret; +} + +unsigned int read_signal_uint(string name) { + return stoi(read_signal_binstr(name), 0, 2); +} + // set the 1-bit signal with given name to 1 void set_bool(string name) { top->put_value(port_map[name], &one_val); From 71a09647bb687970c68f1e3ab888bdaaa8997ce8 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 11:49:40 +0200 Subject: [PATCH 048/102] [xsi] don't call prep_rtlsim_io_dict for dummy data --- src/finn/core/rtlsim_exec.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b04e305357..75a325cefa 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -122,11 +122,9 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc "inputs" : {"" : }, "outputs" : {"" : }, } - Example with dummy_data = False + Example with dummy_data = False: execution_context = { - "inputs" : {"" : [list_of_input_data] }, - # output lists will be filled with actual data on return - "outputs" : {"" : [] }, + "" : } The postproc_cpp optional argument can be used to inject C++ code to retrieve @@ -151,7 +149,10 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc ), """The directory from metadata property "vivado_stitch_proj" doesn't exist""" trace_file = model.get_metadata_prop("rtlsim_trace") - io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict(model, execution_context) + if not dummy_data_mode: + io_dict, if_dict, num_out_values, o_tensor_info = prep_rtlsim_io_dict( + model, execution_context + ) # prepare rtlsim compiled object (unless it already exists) rtlsim_so = model.get_metadata_prop("rtlsim_so") From 9dba7c27fcaa9e43bf8d192fa4dea11d992d7819 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 11:50:35 +0200 Subject: [PATCH 049/102] [FIFO] introduce XSI-based FIFO sizing --- .../fpgadataflow/set_fifo_depths.py | 53 +++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 78a6142a47..45d4a6dfce 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -42,6 +42,7 @@ ) from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance +from finn.core.rtlsim_exec import rtlsim_exec_cppxsi from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP @@ -196,6 +197,44 @@ def apply(self, model): return (model, False) +def xsi_fifosim(model, n_inferences, max_iters=100000000): + """Create a XSI model of stitched IP and use a simple C++ + driver to drive the input stream. Useful for FIFO sizing, latency + and throughput measurement.""" + + assert len(model.graph.input) == 1, "Only a single input stream is supported" + assert len(model.graph.output) == 1, "Only a single output stream is supported" + iname = model.graph.input[0].name + first_node = model.find_consumer(iname) + oname = model.graph.output[0].name + last_node = model.find_producer(oname) + assert (first_node is not None) and (last_node is not None), "Failed to find first/last nodes" + # define execution context for dummy data mode: + # only number of transactions, no real data + # TODO add support for multiple I/O streams + ctx = { + "global_in": n_inferences, + } + # create C++ code snippet for postprocessing: + # grab maxcount values from FIFOs, dump into existing results file + fifo_log = [] + fifo_log_templ = ' results_file << "maxcount%s" << "\\t" ' + fifo_log_templ += '<< to_string(read_signal_uint("maxcount%s")) << endl;' + fifo_nodes = model.get_nodes_by_op_type("StreamingFIFO_rtl") + fifo_ind = 0 + for fifo_node in fifo_nodes: + fifo_node = getCustomOp(fifo_node) + if fifo_node.get_nodeattr("depth_monitor") == 1: + suffix = "" if fifo_ind == 0 else "_%d" % fifo_ind + fifo_log.append(fifo_log_templ % (suffix, suffix)) + fifo_ind += 1 + fifo_log = "\n".join(fifo_log) + # run XSI sim with postproc + ret_dict = rtlsim_exec_cppxsi(model, ctx, dummy_data_mode=True, postproc_cpp=fifo_log) + + return ret_dict + + class InsertAndSetFIFODepths(Transformation): """Insert appropriate-depth StreamingFIFOs through RTLSim that preserve throughput in the created accelerator. @@ -380,6 +419,8 @@ def apply(self, model): warnings.warn("No output detected, calculated FIFO depths may not be correct") else: # do rtlsim in C++ for FIFO sizing + # use the rtlsim_backend metadata_prop to decide which backend to use + backend = model.get_metadata_prop("rtlsim_backend") # determine # inputs for FIFO sizing according to topology type swg_nodes = [ x for x in model.graph.node if x.op_type.startswith("ConvolutionInputGenerator") @@ -387,13 +428,17 @@ def apply(self, model): if len(swg_nodes) == 0: # MLP, no layer overlap # assuming half the nodes are now FIFOs, use half the # of - # nodes as # inputs to drive the imulation - n_inputs = int(len(model.graph.node) / 2) + # nodes as # inputs to drive the simulation + n_inferences = int(len(model.graph.node) / 2) else: # convnet, two inputs are typically enough to fill entire # layer pipeline due to overlaps - n_inputs = 2 - sim = verilator_fifosim(model, n_inputs) + n_inferences = 2 + + if backend is None or backend in ["verilator", "pyverilator"]: + sim = verilator_fifosim(model, n_inferences) + elif backend in ["xsi", "pyxsi"]: + sim = xsi_fifosim(model, n_inferences) for ind, node in enumerate(fifo_nodes): maxcount_name = "maxcount_%d" % ind From 69df427a1c4d1fa33d0dd1a80d6a47e5aa15bbbb Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 14:57:10 +0100 Subject: [PATCH 050/102] [xsi] make tracing optional --- src/finn/core/rtlsim_exec.py | 5 +++-- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 75a325cefa..1f7571941b 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -212,8 +212,9 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc "OUTSTREAM_NAME": "m_axis_0", "CLK_NAME": "ap_clk", "NRST_NAME": "ap_rst_n", - # TODO control tracing and trace filename - "TRACE_FILE": top_module_name + ".wdb", + # control tracing and trace filename + "TRACE_FILE": "NULL" if trace_file is None else f'"{trace_file}"', + "TRACE_CMD": "" if trace_file is None else "top->trace_all();", # code to post-process final sim status to extract more data "POSTPROC_CPP": postproc_cpp, } diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 7de61af1d4..5cefc6ce6b 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -224,11 +224,9 @@ int main(int argc, char *argv[]) { s_xsi_setup_info info; memset(&info, 0, sizeof(info)); info.logFileName = NULL; - char wdbName[] = "@TRACE_FILE@"; - info.wdbFileName = wdbName; + info.wdbFileName = @TRACE_FILE@; top->open(&info); - // TODO add option to enable/disable tracing for faster sim? - top->trace_all(); + @TRACE_CMD@ populate_port_map(); From eab118d77e8f871fc6899f2fec2945e12d41a9c4 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 22 Oct 2024 14:58:19 +0100 Subject: [PATCH 051/102] [FIFO] remove stitched IP and rtlsim metadata after FIFO sizing --- .../transformation/fpgadataflow/set_fifo_depths.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 45d4a6dfce..d8d8d9afa6 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -439,6 +439,8 @@ def apply(self, model): sim = verilator_fifosim(model, n_inferences) elif backend in ["xsi", "pyxsi"]: sim = xsi_fifosim(model, n_inferences) + else: + assert False, f"Unrecognized backend for InsertAndSetFIFODepths: {backend}" for ind, node in enumerate(fifo_nodes): maxcount_name = "maxcount_%d" % ind @@ -494,6 +496,15 @@ def apply(self, model): # remove shallow FIFOs model = model.transform(RemoveShallowFIFOs()) + # clean up references to stitched IP and rtlsim objects + # (the stitched IP needs to be re-done after FIFO sizing) + model.set_metadata_prop("rtlsim_trace", "") + model.set_metadata_prop("rtlsim_so", "") + model.set_metadata_prop("vivado_stitch_proj", "") + model.set_metadata_prop("wrapper_filename", "") + model.set_metadata_prop("vivado_stitch_vlnv", "") + model.set_metadata_prop("vivado_stitch_ifnames", "") + # reflect final values in attributes for node in model.graph.node: if not node.op_type.startswith("StreamingFIFO"): From 04c34a0d0031cd6699a49e4bbe18f024bc6fba69 Mon Sep 17 00:00:00 2001 From: Alexander Hornburg Date: Wed, 6 Nov 2024 18:20:02 +0000 Subject: [PATCH 052/102] [xsi] add Vivado lib path to global LD_LIBRARY_PATH again XSI needs to find some Vivado shared libraries. This can be done by adding it to LD_LIBRARY_PATH. However just adding this path by itself causes conflicts with libssl. It would pick up libssl within the Vivado installation instead of the system libssl. This causes various issues in using https, curl, wget or any kind of secure web download in the whole docker image. The workaround to the above issue is to prepend the dockers system lib path (which contains libssl) before Vivados lib path in LD_LIBRARY_PATH. --- docker/finn_entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index 1eb9345f89..af6b716cd7 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -118,6 +118,7 @@ else cd $OLDPWD fi export PYTHONPATH=$PYTHONPATH:${FINN_ROOT}/deps/pyxsi:${FINN_ROOT}/deps/pyxsi/py + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/lib/x86_64-linux-gnu/:${XILINX_VIVADO}/lib/lnx64.o fi if [ -f "$HLS_PATH/settings64.sh" ];then From 83dd9445ee6e9216f3c913fc8a903247ee56d1b3 Mon Sep 17 00:00:00 2001 From: Alexander Hornburg Date: Thu, 14 Nov 2024 13:22:48 +0000 Subject: [PATCH 053/102] [xsi] replace pyxsi_rpc with pyxsi_utils This commit removes the pyxsi RPC mechanism and instead calls pyxsi directly. pyxsi depdency is updated to include a double free bugfix. Ported pyxsi_utils from https://github.com/maltanar/pyxsi/tree/feature/utils --- fetch-repos.sh | 2 +- src/finn/core/rtlsim_exec.py | 16 +- .../fpgadataflow/hls/addstreams_hls.py | 6 +- src/finn/custom_op/fpgadataflow/hlsbackend.py | 4 +- src/finn/custom_op/fpgadataflow/hwcustomop.py | 13 +- src/finn/util/pyxsi_rpcclient.py | 223 ------------------ src/finn/util/pyxsi_rpcserver.py | 129 ---------- 7 files changed, 20 insertions(+), 373 deletions(-) delete mode 100644 src/finn/util/pyxsi_rpcclient.py delete mode 100644 src/finn/util/pyxsi_rpcserver.py diff --git a/fetch-repos.sh b/fetch-repos.sh index 73d5e6bc39..de3a95ee4f 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="dc074bc1b3ecc2ab884531565d1aca6aa33ea5b9" +PYXSI_COMMIT="8c5abd8546bfc6e31292a52b5374116381239651" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 1f7571941b..b8781dc595 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -46,7 +46,7 @@ except ModuleNotFoundError: PyVerilator = None -import finn.util.pyxsi_rpcclient as pyxsi_rpcclient +import pyxsi_utils def prep_rtlsim_io_dict(model, execution_context): @@ -164,7 +164,7 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc all_verilog_srcs = f.read().split() single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") - rtlsim_so = pyxsi_rpcclient.compile_sim_obj( + rtlsim_so = pyxsi_utils.compile_sim_obj( top_module_name, all_verilog_srcs, single_src_dir ) # save generated lib filename in attribute @@ -294,7 +294,7 @@ def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): top_module_name = top_module_file_name.strip(".v") single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") - rtlsim_so = pyxsi_rpcclient.compile_sim_obj( + rtlsim_so = pyxsi_utils.compile_sim_obj( top_module_name, all_verilog_srcs, single_src_dir ) # save generated lib filename in attribute @@ -303,17 +303,17 @@ def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): # pass in correct tracefile from attribute if trace_file == "default": trace_file = top_module_file_name + ".wdb" - sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) + sim = pyxsi_utils.load_sim_obj(sim_base, sim_rel, trace_file) else: sim_base, sim_rel = rtlsim_so.split("xsim.dir") sim_rel = "xsim.dir" + sim_rel - sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, trace_file) + sim = pyxsi_utils.load_sim_obj(sim_base, sim_rel, trace_file) # reset and call rtlsim, including any pre/post hooks - pyxsi_rpcclient.reset_rtlsim(sim) + pyxsi_utils.reset_rtlsim(sim) if pre_hook is not None: pre_hook(sim) - n_cycles = pyxsi_rpcclient.rtlsim_multi_io( + n_cycles = pyxsi_utils.rtlsim_multi_io( sim, io_dict, num_out_values, @@ -324,7 +324,7 @@ def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): post_hook(sim) # important to call close_rtlsim for pyxsi to flush traces and stop # the RPC server process - pyxsi_rpcclient.close_rtlsim(sim) + pyxsi_utils.close_rtlsim(sim) # unpack outputs and put back into execution context for o, o_vi in enumerate(model.graph.output): diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index cd7686674b..13b50007f9 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -29,7 +29,7 @@ import numpy as np import os -import finn.util.pyxsi_rpcclient as pyxsi_rpcclient +import pyxsi_utils from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -131,12 +131,12 @@ def execute_node(self, context, graph): super().reset_rtlsim(sim) super().toggle_clk(sim) else: - pyxsi_rpcclient.reset_rtlsim(sim) + pyxsi_utils.reset_rtlsim(sim) io_dict = {"inputs": {"in0": rtlsim_inp0, "in1": rtlsim_inp1}, "outputs": {"out": []}} self.rtlsim_multi_io(sim, io_dict) rtlsim_output = io_dict["outputs"]["out"] if rtlsim_backend == "pyxsi": - pyxsi_rpcclient.close_rtlsim(sim) + pyxsi_utils.close_rtlsim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index 76e79693c3..bb47af402d 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -32,7 +32,7 @@ from abc import ABC, abstractmethod from qonnx.core.datatype import DataType -import finn.util.pyxsi_rpcclient as pyxsi_rpcclient +import pyxsi_utils from finn.custom_op.fpgadataflow import templates from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir from finn.util.hls import CallHLS @@ -116,7 +116,7 @@ def prepare_rtlsim(self): # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", sim.lib._name) elif rtlsim_backend == "pyxsi": - ret = pyxsi_rpcclient.compile_sim_obj( + ret = pyxsi_utils.compile_sim_obj( self.get_verilog_top_module_name(), verilog_files, single_src_dir ) # save generated lib filename in attribute diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 25a2ee130d..5bf52a7365 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -41,7 +41,7 @@ except ModuleNotFoundError: PyVerilator = None -import finn.util.pyxsi_rpcclient as pyxsi_rpcclient +import pyxsi_utils class HWCustomOp(CustomOp): @@ -141,14 +141,13 @@ def get_rtlsim(self): # create PyVerilator wrapper sim = PyVerilator(rtlsim_so) elif rtlsim_backend == "pyxsi": - # load up pyXSI sim using pyxsi_rpcclient sim_base, sim_rel = rtlsim_so.split("xsim.dir") sim_rel = "xsim.dir" + sim_rel # pass in correct tracefile from attribute tracefile = self.get_nodeattr("rtlsim_trace") if tracefile == "default": tracefile = self.onnx_node.name + ".wdb" - sim = pyxsi_rpcclient.load_sim_obj(sim_base, sim_rel, tracefile) + sim = pyxsi_utils.load_sim_obj(sim_base, sim_rel, tracefile) else: assert False, "Unknown rtlsim_backend" @@ -162,7 +161,7 @@ def close_rtlsim(self, sim): # no action needed pass elif rtlsim_backend == "pyxsi": - pyxsi_rpcclient.close_rtlsim(sim) + pyxsi_utils.close_rtlsim(sim) else: assert False, "Unknown rtlsim_backend" @@ -231,7 +230,7 @@ def reset_rtlsim(self, sim): sim.io.ap_clk = 0 sim.io.ap_rst_n = 1 elif rtlsim_backend == "pyxsi": - pyxsi_rpcclient.reset_rtlsim(sim) + pyxsi_utils.reset_rtlsim(sim) else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" @@ -242,7 +241,7 @@ def toggle_clk(self, sim): sim.io.ap_clk = 1 sim.io.ap_clk = 0 elif rtlsim_backend == "pyxsi": - pyxsi_rpcclient.toggle_clk(sim) + pyxsi_utils.toggle_clk(sim) else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" @@ -265,7 +264,7 @@ def rtlsim_multi_io(self, sim, io_dict): liveness_threshold=pyverilate_get_liveness_threshold_cycles(), ) elif rtlsim_backend == "pyxsi": - total_cycle_count = pyxsi_rpcclient.rtlsim_multi_io( + total_cycle_count = pyxsi_utils.rtlsim_multi_io( sim, io_dict, num_out_values, sname=sname ) else: diff --git a/src/finn/util/pyxsi_rpcclient.py b/src/finn/util/pyxsi_rpcclient.py deleted file mode 100644 index fa4909d138..0000000000 --- a/src/finn/util/pyxsi_rpcclient.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of pyxsi nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import subprocess -import xmlrpc.client -from time import sleep - -from finn.util.basic import get_finn_root, get_vivado_root - -try: - import pyxsi_utils -except ModuleNotFoundError: - pyxsi_utils = None - - -def compile_sim_obj(top_module_name, source_list, sim_out_dir): - # compile_sim_obj does not require special envvar settings and is safe to call - # directly without any RPC - ret = pyxsi_utils.compile_sim_obj(top_module_name, source_list, sim_out_dir) - return ret - - -def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile=None, is_toplevel_verilog=True): - # launch a pyxsi RPC server - proc_env = os.environ.copy() - proc_env["LD_LIBRARY_PATH"] = get_vivado_root() + "/lib/lnx64.o" - logfile_wr_fd = open(sim_out_dir + "/pyxsi_rpcserver.log", "w") - logfile_rd_fd = open(sim_out_dir + "/pyxsi_rpcserver.log", "r") - command = ["python", "-u", get_finn_root() + "/src/finn/util/pyxsi_rpcserver.py"] - proc = subprocess.Popen( - command, - bufsize=1, - env=proc_env, - stdout=logfile_wr_fd, - stderr=logfile_wr_fd, - universal_newlines=True, - ) - rpc_port = 8000 - # TODO sleep to ensure RPC server has started before trying to read its port number from stdout - # bit hacky - is there a better way of communicating the open port number back to the client? - line = logfile_rd_fd.readline() - retries = 10 - while line == "" and retries > 0: - sleep(0.1) - line = logfile_rd_fd.readline() - retries -= 1 - if "pyxsi RPC server is now running on" in line: - rpc_port = int(line.split(" on ")[1]) - logfile_rd_fd.close() - else: - assert False, f"Unexpected output from pyxsi RPC server: {line}" - rpc_proxy = xmlrpc.client.ServerProxy(f"http://localhost:{rpc_port}", allow_none=True) - sim_id = rpc_proxy.load_sim_obj( - sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog - ) - # return all relevant information for rtlsim - handle = (sim_id, rpc_proxy, rpc_port, proc) - return handle - - -def close_rtlsim(handle): - (sim_id, rpc_proxy, _, proc) = handle - rpc_proxy.close_rtlsim(sim_id) - proc.terminate() - - -def find_signal(handle, signal_name): - (sim_id, rpc_proxy, _, _) = handle - ret = rpc_proxy.find_signal(sim_id, signal_name) - return ret - - -def read_signal(handle, signal_name): - (sim_id, rpc_proxy, _, _) = handle - signal_value_str = rpc_proxy.read_signal(sim_id, signal_name) - signal_value = int(signal_value_str) - return signal_value - - -def write_signal(handle, signal_name, signal_value): - (sim_id, rpc_proxy, _, _) = handle - signal_value_str = str(signal_value) - rpc_proxy.write_signal(sim_id, signal_name, signal_value_str) - - -def reset_rtlsim(handle, rst_name="ap_rst_n", active_low=True, clk_name="ap_clk"): - (sim_id, rpc_proxy, _, _) = handle - rpc_proxy.reset_rtlsim(sim_id, rst_name, active_low, clk_name) - - -def toggle_clk(handle, clk_name="ap_clk"): - (sim_id, rpc_proxy, _, _) = handle - rpc_proxy.toggle_clk(sim_id, clk_name) - - -def toggle_neg_edge(handle, clk_name="ap_clk"): - (sim_id, rpc_proxy, _, _) = handle - rpc_proxy.toggle_neg_edge(sim_id, clk_name) - - -def toggle_pos_edge(handle, clk_name="ap_clk"): - (sim_id, rpc_proxy, _, _) = handle - rpc_proxy.toggle_pos_edge(sim_id, clk_name) - - -def rtlsim_multi_io( - handle, - io_dict, - num_out_values, - sname="_V_V_", - liveness_threshold=10000, - hook_preclk=None, - hook_postclk=None, -): - for outp in io_dict["outputs"]: - write_signal(handle, outp + sname + "TREADY", 1) - - # observe if output is completely calculated - # total_cycle_count will contain the number of cycles the calculation ran - output_done = False - total_cycle_count = 0 - output_count = 0 - old_output_count = 0 - - # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles - no_change_count = 0 - - while not (output_done): - signals_to_write = {} - if hook_preclk: - hook_preclk(handle) - # Toggle falling edge to arrive at a delta cycle before the rising edge - toggle_neg_edge(handle) - - # examine signals, decide how to act based on that but don't update yet - # so only read_signal access in this block, no _write_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - if ( - read_signal(handle, signal_name + "TREADY") == 1 - and read_signal(handle, signal_name + "TVALID") == 1 - ): - inputs = inputs[1:] - io_dict["inputs"][inp] = inputs - - for outp in io_dict["outputs"]: - outputs = io_dict["outputs"][outp] - signal_name = outp + sname - if ( - read_signal(handle, signal_name + "TREADY") == 1 - and read_signal(handle, signal_name + "TVALID") == 1 - ): - outputs = outputs + [read_signal(handle, signal_name + "TDATA")] - output_count += 1 - io_dict["outputs"][outp] = outputs - - # update signals based on decisions in previous block, but don't examine anything - # so only write_signal access in this block, no read_signal - for inp in io_dict["inputs"]: - inputs = io_dict["inputs"][inp] - signal_name = inp + sname - signals_to_write[signal_name + "TVALID"] = 1 if len(inputs) > 0 else 0 - signals_to_write[signal_name + "TDATA"] = inputs[0] if len(inputs) > 0 else 0 - - # Toggle rising edge to arrive at a delta cycle before the falling edge - toggle_pos_edge(handle) - - for k, v in signals_to_write.items(): - write_signal(handle, k, v) - - if hook_postclk: - hook_postclk(handle) - - total_cycle_count = total_cycle_count + 1 - - if output_count == old_output_count: - no_change_count = no_change_count + 1 - else: - no_change_count = 0 - old_output_count = output_count - - # check if all expected output words received - if output_count == num_out_values: - output_done = True - - # end sim on timeout - if no_change_count == liveness_threshold: - close_rtlsim(handle) - raise Exception( - "Error in simulation! Takes too long to produce output. " - "Consider setting the liveness_threshold parameter to a " - "larger value." - ) - - return total_cycle_count diff --git a/src/finn/util/pyxsi_rpcserver.py b/src/finn/util/pyxsi_rpcserver.py deleted file mode 100644 index 8ff6f0404f..0000000000 --- a/src/finn/util/pyxsi_rpcserver.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (C) 2024, Advanced Micro Devices, Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of pyxsi nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import pyxsi_utils -from xmlrpc.server import SimpleXMLRPCRequestHandler, SimpleXMLRPCServer - -# since simulation with XSI requires a certain LD_LIBRARY_PATH setting -# which breaks other applications, we launch the simulation in its -# own executable with this env.var. setting, and use xmlrpc to access it - -try: - ldlp = os.environ["LD_LIBRARY_PATH"] - if not ("Vivado" in ldlp): - assert False, "Must be launched with LD_LIBRARY_PATH=$(XILINX_VIVADO)/lib/lnx64.o" -except KeyError: - assert False, "Must be launched with LD_LIBRARY_PATH=$(XILINX_VIVADO)/lib/lnx64.o" - - -class RequestHandler(SimpleXMLRPCRequestHandler): - rpc_paths = ("/RPC2",) - - -# we need to do some conversions while pyxsi calls are going through xmlrpc: -# * sim objs become strings (stored in the sim_id_to_obj dict until done) -# * signal values become strings -# (converted back and forth to Python integers) - -sim_id_to_obj = {} - - -def compile_sim_obj(top_module_name, source_list, sim_out_dir): - ret = pyxsi_utils.compile_sim_obj(top_module_name, source_list, sim_out_dir) - return ret - - -def load_sim_obj(sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog): - ret_sim_obj = pyxsi_utils.load_sim_obj( - sim_out_dir, out_so_relative_path, tracefile, is_toplevel_verilog - ) - ret_sim_id = str(id(ret_sim_obj)) - sim_id_to_obj[ret_sim_id] = ret_sim_obj - return ret_sim_id - - -def find_signal(sim_id, signal_name): - sim = sim_id_to_obj[sim_id] - return pyxsi_utils._find_signal(sim, signal_name) - - -def read_signal(sim_id, signal_name): - sim = sim_id_to_obj[sim_id] - signal_value = pyxsi_utils._read_signal(sim, signal_name) - signal_value_str = str(signal_value) - return signal_value_str - - -def write_signal(sim_id, signal_name, signal_value_str): - sim = sim_id_to_obj[sim_id] - signal_value = int(signal_value_str) - pyxsi_utils._write_signal(sim, signal_name, signal_value) - - -def reset_rtlsim(sim_id, rst_name, active_low, clk_name): - sim = sim_id_to_obj[sim_id] - pyxsi_utils.reset_rtlsim(sim, rst_name, active_low, clk_name) - - -def toggle_clk(sim_id, clk_name): - sim = sim_id_to_obj[sim_id] - pyxsi_utils.toggle_clk(sim, clk_name) - - -def toggle_neg_edge(sim_id, clk_name): - sim = sim_id_to_obj[sim_id] - pyxsi_utils.toggle_neg_edge(sim, clk_name) - - -def toggle_pos_edge(sim_id, clk_name): - sim = sim_id_to_obj[sim_id] - pyxsi_utils.toggle_pos_edge(sim, clk_name) - - -def close_rtlsim(sim_id): - sim = sim_id_to_obj[sim_id] - pyxsi_utils.close_rtlsim(sim) - - -# ask to create server on port 0 to find an available port -with SimpleXMLRPCServer(("localhost", 0), requestHandler=RequestHandler, allow_none=True) as server: - port = server.server_address[1] - server.register_introspection_functions() - server.register_function(compile_sim_obj) - server.register_function(load_sim_obj) - server.register_function(find_signal) - server.register_function(read_signal) - server.register_function(write_signal) - server.register_function(reset_rtlsim) - server.register_function(toggle_clk) - server.register_function(toggle_neg_edge) - server.register_function(toggle_pos_edge) - server.register_function(close_rtlsim) - print(f"pyxsi RPC server is now running on {port}") - server.serve_forever() From d4bf3b3790a23bd6e0d79b5d505d65e8e86326d6 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 18 Nov 2024 11:13:31 +0000 Subject: [PATCH 054/102] [pyxsi] Adjust copyright header and linting --- src/finn/core/rtlsim_exec.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index b8781dc595..764e330b66 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -164,9 +164,7 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc all_verilog_srcs = f.read().split() single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") - rtlsim_so = pyxsi_utils.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir - ) + rtlsim_so = pyxsi_utils.compile_sim_obj(top_module_name, all_verilog_srcs, single_src_dir) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) sim_base, sim_rel = rtlsim_so @@ -294,9 +292,7 @@ def rtlsim_exec_pyxsi(model, execution_context, pre_hook=None, post_hook=None): top_module_name = top_module_file_name.strip(".v") single_src_dir = make_build_dir("rtlsim_" + top_module_name + "_") - rtlsim_so = pyxsi_utils.compile_sim_obj( - top_module_name, all_verilog_srcs, single_src_dir - ) + rtlsim_so = pyxsi_utils.compile_sim_obj(top_module_name, all_verilog_srcs, single_src_dir) # save generated lib filename in attribute model.set_metadata_prop("rtlsim_so", rtlsim_so[0] + "/" + rtlsim_so[1]) sim_base, sim_rel = rtlsim_so From bf1fafa70e38bbacae00a92c11582154bc16df6e Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 08:55:28 +0000 Subject: [PATCH 055/102] [Lint] Run pre-commit --- src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py | 2 +- src/finn/custom_op/fpgadataflow/hlsbackend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index 13b50007f9..e13a7c02a4 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -28,8 +28,8 @@ import numpy as np import os - import pyxsi_utils + from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index bb47af402d..5f2c156d88 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -28,11 +28,11 @@ import numpy as np import os +import pyxsi_utils import subprocess from abc import ABC, abstractmethod from qonnx.core.datatype import DataType -import pyxsi_utils from finn.custom_op.fpgadataflow import templates from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir from finn.util.hls import CallHLS From 42582f9aa8f9be3ef0cdb7d22f00fdfca2a23bd9 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 09:54:48 +0000 Subject: [PATCH 056/102] [pyxsi] Import optional to ensure non synthesis tests run --- src/finn/core/rtlsim_exec.py | 5 ++++- src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py | 6 +++++- src/finn/custom_op/fpgadataflow/hlsbackend.py | 6 +++++- src/finn/custom_op/fpgadataflow/hwcustomop.py | 5 ++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 764e330b66..e1f3897787 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -46,7 +46,10 @@ except ModuleNotFoundError: PyVerilator = None -import pyxsi_utils +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None def prep_rtlsim_io_dict(model, execution_context): diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index e13a7c02a4..f816a4aad9 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -28,12 +28,16 @@ import numpy as np import os -import pyxsi_utils from finn.custom_op.fpgadataflow.addstreams import AddStreams from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + class AddStreams_hls(AddStreams, HLSBackend): """Class that corresponds to finn-hlslib AddStreams_Batch function.""" diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index 5f2c156d88..ebf9c51d75 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -28,7 +28,6 @@ import numpy as np import os -import pyxsi_utils import subprocess from abc import ABC, abstractmethod from qonnx.core.datatype import DataType @@ -43,6 +42,11 @@ except ModuleNotFoundError: PyVerilator = None +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + class HLSBackend(ABC): """HLSBackend class all custom ops that correspond to a finn-hlslib diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 5bf52a7365..9675fff9cf 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -41,7 +41,10 @@ except ModuleNotFoundError: PyVerilator = None -import pyxsi_utils +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None class HWCustomOp(CustomOp): From 63a7ef6e2b91f7311f6359646b5c7ae73aa86e02 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 15:33:03 +0000 Subject: [PATCH 057/102] [pyxsi] Enable both rtlsim backends in all HLS layers --- .../custom_op/fpgadataflow/hls/addstreams_hls.py | 15 +++------------ .../fpgadataflow/hls/channelwise_op_hls.py | 4 +++- .../custom_op/fpgadataflow/hls/checksum_hls.py | 4 +++- src/finn/custom_op/fpgadataflow/hls/concat_hls.py | 5 +++-- .../hls/convolutioninputgenerator_hls.py | 4 +++- .../custom_op/fpgadataflow/hls/downsampler_hls.py | 4 +++- .../fpgadataflow/hls/duplicatestreams_hls.py | 4 +++- .../custom_op/fpgadataflow/hls/fmpadding_hls.py | 4 +++- .../fpgadataflow/hls/fmpadding_pixel_hls.py | 4 +++- .../fpgadataflow/hls/globalaccpool_hls.py | 4 +++- .../custom_op/fpgadataflow/hls/labelselect_hls.py | 4 +++- src/finn/custom_op/fpgadataflow/hls/lookup_hls.py | 4 +++- .../hls/matrixvectoractivation_hls.py | 4 +++- src/finn/custom_op/fpgadataflow/hls/pool_hls.py | 4 +++- .../hls/streamingdatawidthconverter_hls.py | 4 +++- .../fpgadataflow/hls/streamingeltwise_hls.py | 4 +++- .../fpgadataflow/hls/streamingmaxpool_hls.py | 4 +++- .../fpgadataflow/hls/thresholding_hls.py | 4 +++- .../custom_op/fpgadataflow/hls/upsampler_hls.py | 4 +++- .../hls/vectorvectoractivation_hls.py | 4 +++- src/finn/custom_op/fpgadataflow/hwcustomop.py | 2 +- 21 files changed, 61 insertions(+), 33 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py index f816a4aad9..b713be14e5 100644 --- a/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/addstreams_hls.py @@ -33,11 +33,6 @@ from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -try: - import pyxsi_utils -except ModuleNotFoundError: - pyxsi_utils = None - class AddStreams_hls(AddStreams, HLSBackend): """Class that corresponds to finn-hlslib AddStreams_Batch function.""" @@ -122,7 +117,6 @@ def execute_node(self, context, graph): context[node.output[0]].shape == exp_oshape ), "cppsim did not produce expected output shape" elif mode == "rtlsim": - rtlsim_backend = self.get_nodeattr("rtlsim_backend") sim = self.get_rtlsim() nbits = self.get_instream_width() rtlsim_inp0 = npy_to_rtlsim_input( @@ -131,16 +125,13 @@ def execute_node(self, context, graph): rtlsim_inp1 = npy_to_rtlsim_input( "{}/input_1.npy".format(code_gen_dir), export_idt, nbits ) - if rtlsim_backend == "pyverilator": - super().reset_rtlsim(sim) + super().reset_rtlsim(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": super().toggle_clk(sim) - else: - pyxsi_utils.reset_rtlsim(sim) io_dict = {"inputs": {"in0": rtlsim_inp0, "in1": rtlsim_inp1}, "outputs": {"out": []}} self.rtlsim_multi_io(sim, io_dict) rtlsim_output = io_dict["outputs"]["out"] - if rtlsim_backend == "pyxsi": - pyxsi_utils.close_rtlsim(sim) + super().close_rtlsim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py b/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py index adb71c0ccb..c224cf64d4 100644 --- a/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/channelwise_op_hls.py @@ -284,12 +284,14 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/checksum_hls.py b/src/finn/custom_op/fpgadataflow/hls/checksum_hls.py index 8a72ca3c6c..5bef15c66f 100644 --- a/src/finn/custom_op/fpgadataflow/hls/checksum_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/checksum_hls.py @@ -188,12 +188,14 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py index 008fa9cee8..bf1f906b63 100644 --- a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py @@ -143,9 +143,10 @@ def execute_node(self, context, graph): ) io_dict["inputs"]["in%d" % i] = rtlsim_inp super().reset_rtlsim(sim) - super().toggle_clk(sim) - + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py b/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py index eeb7dd880d..0e45ea7ef5 100644 --- a/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/convolutioninputgenerator_hls.py @@ -387,12 +387,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py b/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py index 76364befde..df045583fc 100644 --- a/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/downsampler_hls.py @@ -138,12 +138,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/duplicatestreams_hls.py b/src/finn/custom_op/fpgadataflow/hls/duplicatestreams_hls.py index e19149435e..a9fbe3ddf0 100644 --- a/src/finn/custom_op/fpgadataflow/hls/duplicatestreams_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/duplicatestreams_hls.py @@ -148,7 +148,8 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) rtlsim_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {}, @@ -156,6 +157,7 @@ def execute_node(self, context, graph): for i in range(n_outputs): rtlsim_dict["outputs"]["out%d" % i] = [] self.rtlsim_multi_io(sim, rtlsim_dict) + super().close_rtlsim(sim) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py b/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py index a6eb9cab06..6355acba9b 100644 --- a/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/fmpadding_hls.py @@ -185,12 +185,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py b/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py index 7e6bb80e3e..a39b7e5b03 100644 --- a/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/fmpadding_pixel_hls.py @@ -140,12 +140,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py b/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py index e195850663..0d2ba2ff0b 100644 --- a/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/globalaccpool_hls.py @@ -118,12 +118,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py b/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py index a79856f7ee..19e1318205 100644 --- a/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/labelselect_hls.py @@ -120,12 +120,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py index fbe12e51eb..98a04b0bc9 100644 --- a/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/lookup_hls.py @@ -297,12 +297,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() diff --git a/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py b/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py index 772057b7d8..a355445c48 100644 --- a/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/matrixvectoractivation_hls.py @@ -542,7 +542,8 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) self.reset_rtlsim(sim) - self.toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) if mem_mode == "external" or mem_mode == "internal_decoupled": wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() @@ -562,6 +563,7 @@ def execute_node(self, context, graph): "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/pool_hls.py b/src/finn/custom_op/fpgadataflow/hls/pool_hls.py index 609c53fd68..2918f88a81 100644 --- a/src/finn/custom_op/fpgadataflow/hls/pool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/pool_hls.py @@ -235,12 +235,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py index c58aabbdbe..fb8ee42f5a 100644 --- a/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingdatawidthconverter_hls.py @@ -177,12 +177,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py index 41ee72fe8c..efa98f2ea6 100644 --- a/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingeltwise_hls.py @@ -129,12 +129,14 @@ def execute_node(self, context, graph): "{}/input_1.npy".format(code_gen_dir), export_idt1, nbits1 ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp0, "in1": rtlsim_inp1}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py b/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py index f7546d7e1a..c03d9a0ece 100755 --- a/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/streamingmaxpool_hls.py @@ -190,12 +190,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py index 4c0da73ec9..a2a53a6689 100644 --- a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py @@ -336,7 +336,8 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) if self.get_nodeattr("mem_mode") == "internal_decoupled": wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() @@ -356,6 +357,7 @@ def execute_node(self, context, graph): else: raise Exception("Unrecognized mem_mode") self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py b/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py index c6a062a775..0dfe9096b0 100644 --- a/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/upsampler_hls.py @@ -148,12 +148,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py b/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py index 8f2419b694..455d477c88 100644 --- a/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/vectorvectoractivation_hls.py @@ -191,7 +191,8 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) if mem_mode == "external" or mem_mode == "internal_decoupled": wnbits = self.get_weightstream_width() @@ -214,6 +215,7 @@ def execute_node(self, context, graph): "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 9675fff9cf..492680791d 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -72,7 +72,7 @@ def get_nodeattr_types(self): "res_estimate": ("s", False, ""), "res_synth": ("s", False, ""), "rtlsim_so": ("s", False, ""), - "rtlsim_backend": ("s", False, "pyverilator", {"pyverilator", "pyxsi"}), + "rtlsim_backend": ("s", False, "pyxsi", {"pyverilator", "pyxsi"}), # partitioning info # ID of SLR to which the Op is attached in Vitis builds # Set to -1 as 'don't care' From 8812079d8776ea1858f79bc54aadca1964f1880b Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 7 Nov 2024 19:13:59 +0100 Subject: [PATCH 058/102] [RTL] make it possible to use pyxsi as rtlsim backend --- src/finn/custom_op/fpgadataflow/rtlbackend.py | 45 +++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index fe7ac51a13..9f2d28a3b7 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -28,6 +28,7 @@ from abc import ABC, abstractmethod +from finn.util import pyxsi_rpcclient from finn.util.basic import get_rtlsim_trace_depth, make_build_dir try: @@ -61,18 +62,36 @@ def prepare_rtlsim(self): raise ImportError("Installation of PyVerilator is required.") verilog_paths = self.get_verilog_paths() - verilog_files = self.get_rtl_file_list() - - # build the Verilator emu library - sim = PyVerilator.build( - verilog_files, - build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), - verilog_path=verilog_paths, - trace_depth=get_rtlsim_trace_depth(), - top_module_name=self.get_nodeattr("gen_top_module"), - ) - # save generated lib filename in attribute - self.set_nodeattr("rtlsim_so", sim.lib._name) + rtlsim_backend = self.get_nodeattr("rtlsim_backend") + if rtlsim_backend == "pyverilator": + if PyVerilator is None: + raise ImportError("Installation of PyVerilator is required.") + verilog_files = self.get_rtl_file_list(abspath=False) + + # build the Verilator emu library + sim = PyVerilator.build( + verilog_files, + build_dir=make_build_dir("pyverilator_" + self.onnx_node.name + "_"), + verilog_path=verilog_paths, + trace_depth=get_rtlsim_trace_depth(), + top_module_name=self.get_nodeattr("gen_top_module"), + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", sim.lib._name) + elif rtlsim_backend == "pyxsi": + verilog_files = self.get_rtl_file_list(abspath=True) + single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") + ret = pyxsi_rpcclient.compile_sim_obj( + self.get_verilog_top_module_name(), verilog_files, single_src_dir + ) + # save generated lib filename in attribute + self.set_nodeattr("rtlsim_so", ret[0] + "/" + ret[1]) + # TODO return val of this function is never used + # refactor s.t. it does not return anything at all, + # consistently between pyverilator and pyxsi + sim = None + else: + assert False, "Unknown rtlsim_backend" return sim def get_verilog_paths(self): @@ -82,7 +101,7 @@ def get_verilog_paths(self): return [code_gen_dir] @abstractmethod - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): """Returns list of rtl files. Needs to be filled by each node.""" pass From 1115ddf23e88e1ba13276a08e6449c0a1fba76b6 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Thu, 7 Nov 2024 19:14:19 +0100 Subject: [PATCH 059/102] [RTLMVU] prototyping support for pyxsi rtlsim --- .../rtl/matrixvectoractivation_rtl.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 88249f3673..eb6399648f 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -285,8 +285,21 @@ def prepare_codegen_default(self, fpgapart, clk): return template_path, code_gen_dict - def get_rtl_file_list(self): - verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/mvu/") + else: + code_gen_dir = "" + rtllib_dir = "" + verilog_files = [ + code_gen_dir + self.get_nodeattr("gen_top_module") + "_wrapper.v", + rtllib_dir + "mvu_vvu_axi.sv", + rtllib_dir + "replay_buffer.sv", + rtllib_dir + "mvu_4sx4u.sv", + rtllib_dir + "mvu_vvu_8sx9_dsp58.sv", + rtllib_dir + "mvu_8sx8u_dsp48.sv", + ] return verilog_files def get_verilog_paths(self): From 28e482b8ba242c726b1fe3b8865cee4454176e8e Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 18 Nov 2024 14:13:23 +0000 Subject: [PATCH 060/102] [MVU] use rtlsim fxns from base class instead of pyverilator directly --- .../custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index eb6399648f..0bdd2a9c94 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -28,7 +28,6 @@ import numpy as np import os -from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend @@ -95,8 +94,8 @@ def execute_node(self, context, graph): sim = self.get_rtlsim() nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) - reset_rtlsim(sim) - toggle_clk(sim) + self.reset_rtlsim(sim) + self.toggle_clk(sim) if mem_mode in ["external", "internal_decoupled"]: wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() From 6a16fdf9edcb4f773b3f0ed8632e014c38a3e3dd Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 15:55:49 +0000 Subject: [PATCH 061/102] [pyxsi] Remove references to rpc setup in cherry-picked commits --- .../fpgadataflow/rtl/matrixvectoractivation_rtl.py | 8 +++++--- src/finn/custom_op/fpgadataflow/rtlbackend.py | 8 ++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 0bdd2a9c94..61797dd2fd 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -94,8 +94,9 @@ def execute_node(self, context, graph): sim = self.get_rtlsim() nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) - self.reset_rtlsim(sim) - self.toggle_clk(sim) + super().reset_rtlsim(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) if mem_mode in ["external", "internal_decoupled"]: wnbits = self.get_weightstream_width() export_wdt = self.get_weight_datatype() @@ -113,6 +114,7 @@ def execute_node(self, context, graph): "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() @@ -292,7 +294,7 @@ def get_rtl_file_list(self, abspath=False): code_gen_dir = "" rtllib_dir = "" verilog_files = [ - code_gen_dir + self.get_nodeattr("gen_top_module") + "_wrapper.v", + code_gen_dir + self.get_nodeattr("gen_top_module") + "_wrapper_sim.v", rtllib_dir + "mvu_vvu_axi.sv", rtllib_dir + "replay_buffer.sv", rtllib_dir + "mvu_4sx4u.sv", diff --git a/src/finn/custom_op/fpgadataflow/rtlbackend.py b/src/finn/custom_op/fpgadataflow/rtlbackend.py index 9f2d28a3b7..5aae52ad4b 100644 --- a/src/finn/custom_op/fpgadataflow/rtlbackend.py +++ b/src/finn/custom_op/fpgadataflow/rtlbackend.py @@ -28,7 +28,6 @@ from abc import ABC, abstractmethod -from finn.util import pyxsi_rpcclient from finn.util.basic import get_rtlsim_trace_depth, make_build_dir try: @@ -36,6 +35,11 @@ except ModuleNotFoundError: PyVerilator = None +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + class RTLBackend(ABC): """RTLBackend class all custom ops that correspond to a module in finn-rtllib @@ -81,7 +85,7 @@ def prepare_rtlsim(self): elif rtlsim_backend == "pyxsi": verilog_files = self.get_rtl_file_list(abspath=True) single_src_dir = make_build_dir("rtlsim_" + self.onnx_node.name + "_") - ret = pyxsi_rpcclient.compile_sim_obj( + ret = pyxsi_utils.compile_sim_obj( self.get_verilog_top_module_name(), verilog_files, single_src_dir ) # save generated lib filename in attribute From 43b5437639507dfbe046b03336be01973e7295e8 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 17:44:10 +0000 Subject: [PATCH 062/102] [pyxsi] Add support for pyxsi execution to rtl fmpadding, dwc, fifo --- .../fpgadataflow/rtl/fmpadding_rtl.py | 21 +++++++++++++------ .../rtl/streamingdatawidthconverter_rtl.py | 19 ++++++++++++----- .../fpgadataflow/rtl/streamingfifo_rtl.py | 17 +++++++++++---- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py index 2fd589dd9b..6ee1e27e2d 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/fmpadding_rtl.py @@ -90,12 +90,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() @@ -205,12 +207,19 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/fmpadding/hdl/") + else: + code_gen_dir = "" + rtllib_dir = "" + verilog_files = [ - "fmpadding_axi.sv", - "fmpadding.sv", - "axi2we.sv", - self.get_nodeattr("gen_top_module") + ".v", + rtllib_dir + "fmpadding_axi.sv", + rtllib_dir + "fmpadding.sv", + rtllib_dir + "axi2we.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] return verilog_files diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py index ad9c8d4f06..496e38acfc 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingdatawidthconverter_rtl.py @@ -94,12 +94,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() @@ -166,11 +168,18 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/dwc/hdl/") + else: + code_gen_dir = "" + rtllib_dir = "" + verilog_files = [ - "dwc_axi.sv", - "dwc.sv", - self.get_nodeattr("gen_top_module") + ".v", + rtllib_dir + "dwc_axi.sv", + rtllib_dir + "dwc.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] return verilog_files diff --git a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py index 1b1d632bda..05b45f9e4b 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/streamingfifo_rtl.py @@ -146,12 +146,14 @@ def execute_node(self, context, graph): nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = DataType[self.get_nodeattr("dataType")] target_bits = odt.bitwidth() @@ -253,10 +255,17 @@ def code_generation_ipi(self): "FIFO implementation style %s not supported, please use rtl or vivado" % impl_style ) - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/fifo/hdl/") + else: + code_gen_dir = "" + rtllib_dir = "" + verilog_files = [ - "Q_srl.v", - self.get_nodeattr("gen_top_module") + ".v", + rtllib_dir + "Q_srl.v", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] return verilog_files From a44237bda81e4a22499710ace8994d5091f49a1d Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 19 Nov 2024 17:55:05 +0000 Subject: [PATCH 063/102] [pyxsi] Enable pyxsi for rtl vvau --- .../rtl/vectorvectoractivation_rtl.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py index 11990ebd91..23ba4f5fc9 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/vectorvectoractivation_rtl.py @@ -28,7 +28,6 @@ import numpy as np import os -from pyverilator.util.axi_utils import reset_rtlsim, toggle_clk from qonnx.core.datatype import DataType from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend @@ -95,8 +94,9 @@ def execute_node(self, context, graph): sim = self.get_rtlsim() nbits = self.get_instream_width() inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) - reset_rtlsim(sim) - toggle_clk(sim) + super().reset_rtlsim(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) if mem_mode in ["external", "internal_decoupled"]: wnbits = self.get_weightstream_width() @@ -121,6 +121,7 @@ def execute_node(self, context, graph): "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) output = io_dict["outputs"]["out"] odt = self.get_output_datatype() target_bits = odt.bitwidth() @@ -277,8 +278,22 @@ def prepare_codegen_default(self, fpgapart, clk): return template_path, code_gen_dict - def get_rtl_file_list(self): - verilog_files = [self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"] + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/mvu/") + else: + code_gen_dir = "" + rtllib_dir = "" + + verilog_files = [ + code_gen_dir + self.get_nodeattr("gen_top_module") + "_wrapper_sim.v", + rtllib_dir + "mvu_vvu_axi.sv", + rtllib_dir + "replay_buffer.sv", + rtllib_dir + "mvu_4sx4u.sv", + rtllib_dir + "mvu_vvu_8sx9_dsp58.sv", + rtllib_dir + "mvu_8sx8u_dsp48.sv", + ] return verilog_files def get_verilog_paths(self): From c75678baa0c4ec5d09544cd6fd62f21a614f8ade Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 20 Nov 2024 15:21:48 +0000 Subject: [PATCH 064/102] [rtl thresh] Rename top module and fix intf names --- .../fpgadataflow/rtl/thresholding_rtl.py | 74 ++++++++----------- 1 file changed, 31 insertions(+), 43 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py index d47632dfad..0edc59d16c 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py @@ -30,17 +30,12 @@ import numpy as np import os import shutil -from pyverilator.util.axi_utils import reset_rtlsim, rtlsim_multi_io from qonnx.core.datatype import DataType from qonnx.util.basic import roundup_to_integer_multiple from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend from finn.custom_op.fpgadataflow.thresholding import Thresholding -from finn.util.basic import ( - get_memutil_alternatives, - mem_primitives_versal, - pyverilate_get_liveness_threshold_cycles, -) +from finn.util.basic import get_memutil_alternatives, mem_primitives_versal from finn.util.data_packing import ( npy_to_rtlsim_input, pack_innermost_dim_as_hex_string, @@ -243,9 +238,7 @@ def prepare_codegen_rtl_values(self, model): code_gen_dict["$THRESHOLDS_PATH$"] = ['"./%s_"' % self.onnx_node.name] # Identify the module name - code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [ - self.get_verilog_top_module_name() + "_axi_wrapper" - ] + code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] = [self.get_verilog_top_module_name()] # Set the top module name - AXI wrapper code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"] @@ -287,14 +280,22 @@ def prepare_codegen_rtl_values(self, model): code_gen_dict["$DEEP_PIPELINE$"] = [str(deep_pipeline)] return code_gen_dict - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): """Thresholding binary search RTL file list""" - return [ - "axilite_if.v", - "thresholding.sv", - "thresholding_axi.sv", - self.get_nodeattr("gen_top_module") + ".v", + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/thresholding/hdl/") + else: + code_gen_dir = "" + rtllib_dir = "" + + verilog_files = [ + rtllib_dir + "axilite_if.v", + rtllib_dir + "thresholding.sv", + rtllib_dir + "thresholding_axi.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] + return verilog_files def generate_hdl(self, model, fpgapart, clk): """Prepare HDL files from templates for synthesis""" @@ -373,38 +374,23 @@ def execute_node(self, context, graph): # Create a PyVerilator wrapper of the RTLSim .so sim = self.get_rtlsim() nbits = self.get_instream_width() - inp = npy_to_rtlsim_input("{}/input_0.npy".format(code_gen_dir), export_idt, nbits) - io_names = self.get_verilog_top_module_intf_names() - istream_name = io_names["s_axis"][0][0] - ostream_name = io_names["m_axis"][0][0] + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) io_dict = { - "inputs": {istream_name: inp}, - "outputs": {ostream_name: []}, + "inputs": {"in0": rtlsim_inp}, + "outputs": {"out": []}, } - trace_file = self.get_nodeattr("rtlsim_trace") if trace_file == "default": trace_file = self.onnx_node.name + ".vcd" - sname = "_" - - # Change into so directory to ensure threshold files can be found - rtlsim_so = self.get_nodeattr("rtlsim_so") - so_dir = os.path.dirname(os.path.realpath(rtlsim_so)) - olcwd = os.getcwd() - os.chdir(so_dir) - num_out_values = self.get_number_output_values() - reset_rtlsim(sim) - total_cycle_count = rtlsim_multi_io( - sim, - io_dict, - num_out_values, - trace_file=trace_file, - sname=sname, - liveness_threshold=pyverilate_get_liveness_threshold_cycles(), - ) - self.set_nodeattr("cycles_rtlsim", total_cycle_count) - os.chdir(olcwd) - output = io_dict["outputs"][ostream_name] + + super().reset_rtlsim(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) + self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) + rtlsim_output = io_dict["outputs"]["out"] # Manage output data odt = self.get_output_datatype() @@ -413,7 +399,9 @@ def execute_node(self, context, graph): out_npy_path = "{}/output.npy".format(code_gen_dir) out_shape = self.get_folded_output_shape() - rtlsim_output_to_npy(output, out_npy_path, odt, out_shape, packed_bits, target_bits) + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) # load and reshape output output = np.load(out_npy_path) From 60fdd4205c8121d7d4a774b336998e4def02918c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Fri, 22 Nov 2024 11:50:05 +0000 Subject: [PATCH 065/102] Driving AXI bus padding to zero. --- finn-rtllib/thresholding/hdl/thresholding_axi.sv | 5 ++++- finn-rtllib/thresholding/sim/thresholding_axi_tb.sv | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/finn-rtllib/thresholding/hdl/thresholding_axi.sv b/finn-rtllib/thresholding/hdl/thresholding_axi.sv index 39756e5c2b..04c13424c9 100644 --- a/finn-rtllib/thresholding/hdl/thresholding_axi.sv +++ b/finn-rtllib/thresholding/hdl/thresholding_axi.sv @@ -191,7 +191,10 @@ module thresholding_axi #( .cfg_rack, .cfg_q, .irdy(s_axis_tready), .ivld(s_axis_tvalid), .idat, - .ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata) + .ordy(m_axis_tready), .ovld(m_axis_tvalid), .odat(m_axis_tdata[PE*O_BITS-1:0]) ); + if($bits(m_axis_tdata) > PE*O_BITS) begin : genPadOut + assign m_axis_tdata[$left(m_axis_tdata):PE*O_BITS] = '0; + end : genPadOut endmodule : thresholding_axi diff --git a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv index cfd875f5c4..1a2b8402a0 100644 --- a/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv +++ b/finn-rtllib/thresholding/sim/thresholding_axi_tb.sv @@ -232,7 +232,7 @@ module thresholding_axi_tb #( end join_any done <= 1; - repeat(N+6) @(posedge clk); + repeat(2*N+8) @(posedge clk); assert(QW.size() == 0) else begin $error("Missing %0d outputs.", QW.size()); From 0a32c66557952dd86f849253f5e5f33bd71395c5 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 22 Nov 2024 13:30:12 +0000 Subject: [PATCH 066/102] [pyxsi] Enable pyxsi for rtl swg --- .../rtl/convolutioninputgenerator_rtl.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py index e6cfa204c5..3c063c00d9 100755 --- a/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/convolutioninputgenerator_rtl.py @@ -330,12 +330,14 @@ def execute_node(self, context, graph): "{}/input_0.npy".format(code_gen_dir), export_idt, nbits ) super().reset_rtlsim(sim) - super().toggle_clk(sim) + if self.get_nodeattr("rtlsim_backend") == "pyverilator": + super().toggle_clk(sim) io_dict = { "inputs": {"in0": rtlsim_inp}, "outputs": {"out": []}, } self.rtlsim_multi_io(sim, io_dict) + super().close_rtlsim(sim) rtlsim_output = io_dict["outputs"]["out"] odt = export_idt target_bits = odt.bitwidth() @@ -931,15 +933,21 @@ def generate_hdl(self, model, fpgapart, clk): self.set_nodeattr("ipgen_path", code_gen_dir) self.set_nodeattr("ip_path", code_gen_dir) - def get_rtl_file_list(self): + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.join(os.environ["FINN_ROOT"], "finn-rtllib/swg/") + else: + code_gen_dir = "" + rtllib_dir = "" verilog_files = [ - "swg_pkg.sv", - self.get_nodeattr("gen_top_module") + "_wrapper.v", - self.get_nodeattr("gen_top_module") + "_impl.sv", - "swg_common.sv", + rtllib_dir + "swg_pkg.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + "_wrapper.v", + code_gen_dir + self.get_nodeattr("gen_top_module") + "_impl.sv", + rtllib_dir + "swg_common.sv", ] if self.get_nodeattr("dynamic_mode"): - verilog_files.append(self.get_nodeattr("gen_top_module") + "_axilite.v") + verilog_files.append(code_gen_dir + self.get_nodeattr("gen_top_module") + "_axilite.v") return verilog_files From 563fbb1bcdf1a4aa8a819ec63b3fa9ef34e3c9bc Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 22 Nov 2024 15:26:50 +0000 Subject: [PATCH 067/102] [SWG] Driving AXI bus padding to zero --- finn-rtllib/swg/swg_template_wrapper.v | 4 ++++ finn-rtllib/swg/swg_template_wrapper_dynamic.v | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/finn-rtllib/swg/swg_template_wrapper.v b/finn-rtllib/swg/swg_template_wrapper.v index 22dc6bd8cd..bb657a7478 100644 --- a/finn-rtllib/swg/swg_template_wrapper.v +++ b/finn-rtllib/swg/swg_template_wrapper.v @@ -71,4 +71,8 @@ $TOP_MODULE_NAME$_impl #( .out_V_V_TREADY(out_V_TREADY) ); +if (OUT_WIDTH_PADDED > BUF_OUT_WIDTH) begin + assign out_V_TDATA[OUT_WIDTH_PADDED-1:BUF_OUT_WIDTH] = {(OUT_WIDTH_PADDED-BUF_OUT_WIDTH){1'b0}}; +end + endmodule : $TOP_MODULE_NAME$ diff --git a/finn-rtllib/swg/swg_template_wrapper_dynamic.v b/finn-rtllib/swg/swg_template_wrapper_dynamic.v index 158f3132e3..7e49d3eafb 100644 --- a/finn-rtllib/swg/swg_template_wrapper_dynamic.v +++ b/finn-rtllib/swg/swg_template_wrapper_dynamic.v @@ -180,4 +180,8 @@ $TOP_MODULE_NAME$_impl #( .cfg_last_write(cfg_last_write) ); +if (OUT_WIDTH_PADDED > BUF_OUT_WIDTH) begin + assign out_V_TDATA[OUT_WIDTH_PADDED-1:BUF_OUT_WIDTH] = {(OUT_WIDTH_PADDED-BUF_OUT_WIDTH){1'b0}}; +end + endmodule : $TOP_MODULE_NAME$ From 7dc169e86fbbf3c50e019c978c88bb6e6f54cc4a Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 22 Nov 2024 17:23:07 +0000 Subject: [PATCH 068/102] [Tests] Default to pyxsi for testing --- .../bnn-pynq/tfc_end2end_verification.ipynb | 2 +- src/finn/builder/build_dataflow_steps.py | 6 ++-- tests/end2end/test_end2end_bnn_pynq.py | 2 +- tests/end2end/test_end2end_mobilenet_v1.py | 6 ++-- .../test_fpgadataflow_checksum.py | 4 ++- .../fpgadataflow/test_fpgadataflow_concat.py | 4 +-- ...dataflow_convinputgenerator_rtl_dynamic.py | 8 +++-- tests/fpgadataflow/test_fpgadataflow_dwc.py | 2 +- .../test_fpgadataflow_ipstitch.py | 36 ++----------------- tests/fpgadataflow/test_fpgadataflow_mvau.py | 32 +++++++++-------- .../test_fpgadataflow_thresholding_runtime.py | 8 +++-- tests/fpgadataflow/test_fpgadataflow_vvau.py | 2 +- tests/fpgadataflow/test_runtime_weights.py | 4 ++- 13 files changed, 50 insertions(+), 66 deletions(-) diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb index 0f7903a009..e914781b21 100644 --- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb +++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb @@ -404,7 +404,7 @@ "child_model = child_model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))\n", "child_model = child_model.transform(PrepareRTLSim())\n", "child_model.set_metadata_prop(\"exec_mode\",\"rtlsim\")\n", - "child_model.set_metadata_prop(\"rtlsim_backend\",\"pyverilator\")\n", + "child_model.set_metadata_prop(\"rtlsim_backend\",\"pyxsi\")\n", "child_model.save(build_dir + \"/tfc_w1_a1_dataflow_child.onnx\");" ] }, diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 176f0c5d8d..bddf4395ca 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -109,6 +109,7 @@ InsertAndSetFIFODepths, RemoveShallowFIFOs, SplitLargeFIFOs, + xsi_fifosim, ) from finn.transformation.fpgadataflow.set_folding import SetFolding from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers @@ -126,7 +127,6 @@ get_rtlsim_trace_depth, pyverilate_get_liveness_threshold_cycles, ) -from finn.util.pyverilator import verilator_fifosim from finn.util.test import execute_parent @@ -250,7 +250,7 @@ def prepare_for_stitched_ip_rtlsim(verify_model, cfg): # set top-level prop for stitched-ip rtlsim and launch verify_model.set_metadata_prop("exec_mode", "rtlsim") # TODO make configurable - verify_model.set_metadata_prop("rtlsim_backend", "pyverilator") + verify_model.set_metadata_prop("rtlsim_backend", "pyxsi") # TODO make configurable # verify_model.set_metadata_prop("rtlsim_trace", "trace.vcd") return verify_model @@ -721,7 +721,7 @@ def step_measure_rtlsim_performance(model: ModelWrapper, cfg: DataflowBuildConfi rtlsim_perf_dict = throughput_test_rtlsim(rtlsim_model, rtlsim_bs) rtlsim_perf_dict["latency_cycles"] = rtlsim_latency_dict["cycles"] else: - rtlsim_perf_dict = verilator_fifosim(model, rtlsim_bs) + rtlsim_perf_dict = xsi_fifosim(model, rtlsim_bs) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] clk_ns = float(model.get_metadata_prop("clk_ns")) diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py index c6c56573ec..385bd66e3d 100644 --- a/tests/end2end/test_end2end_bnn_pynq.py +++ b/tests/end2end/test_end2end_bnn_pynq.py @@ -746,7 +746,7 @@ def test_ipstitch_rtlsim(self, topology, wbits, abits, board): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") os.environ["LIVENESS_THRESHOLD"] = str(int(latency * 1.1)) if rtlsim_trace: model.set_metadata_prop("rtlsim_trace", "%s_w%da%d.vcd" % (topology, wbits, abits)) diff --git a/tests/end2end/test_end2end_mobilenet_v1.py b/tests/end2end/test_end2end_mobilenet_v1.py index bdaa3490b3..9bf9be617b 100644 --- a/tests/end2end/test_end2end_mobilenet_v1.py +++ b/tests/end2end/test_end2end_mobilenet_v1.py @@ -60,6 +60,7 @@ import finn.transformation.streamline.reorder as reorder from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance from finn.core.onnx_exec import execute_onnx +from finn.core.throughput_test import throughput_test_rtlsim from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.create_dataflow_partition import ( @@ -89,7 +90,6 @@ from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds from finn.util.basic import get_finn_root from finn.util.pytorch import NormalizePreProc -from finn.util.pyverilator import verilator_fifosim from finn.util.test import ( crop_center, get_test_model_trained, @@ -502,7 +502,7 @@ def test_end2end_mobilenet_stitched_ip_rtlsim(): # set top-level prop for stitched-ip rtlsim and launch model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") ret_rtlsim_ip = execute_onnx(model, inp_dict, True) res_rtlsim_ip = ret_rtlsim_ip[out_name] np.save(build_dir + "/end2end_mobilenet_result_rtlsim_ip.npy", res_rtlsim_ip) @@ -528,7 +528,7 @@ def test_end2end_mobilenet_rtlsim_performance(): # multi-in/out streams currently not supported in our C++ verilator driver rtlsim_bs = 1 - rtlsim_perf_dict = verilator_fifosim(model, rtlsim_bs) + rtlsim_perf_dict = throughput_test_rtlsim(model, batchsize=rtlsim_bs) # keep keys consistent between the Python and C++-styles cycles = rtlsim_perf_dict["cycles"] clk_ns = float(model.get_metadata_prop("clk_ns")) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 1cfb7f9ec9..fe00e2f8d9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -135,6 +135,8 @@ def create_two_fc_model(): return model +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail @pytest.mark.vivado @pytest.mark.fpgadataflow def test_fpgadataflow_checksum(): @@ -182,7 +184,7 @@ def test_fpgadataflow_checksum(): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # define function to read out the checksums from axilite checksums = [] diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py index e207b17a63..2a6a19e4a3 100644 --- a/tests/fpgadataflow/test_fpgadataflow_concat.py +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -157,7 +157,7 @@ def test_fpgadataflow_concat_stitchedip(): ) ) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") - model.set_metadata_prop("rtlsim_trace", "trace.vcd") + model.set_metadata_prop("rtlsim_backend", "pyxsi") + model.set_metadata_prop("rtlsim_trace", "trace.wdb") ret_sim = execute_onnx(model, inp_dict) assert (exp_out == ret_sim[oname]).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index a98cc1ab54..e69da49fa1 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -205,6 +205,8 @@ def write_swg_config(sim): } +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail @pytest.mark.parametrize("cfg", [cfg0, cfg1, cfg2]) @pytest.mark.slow @pytest.mark.vivado @@ -290,7 +292,7 @@ def test_fpgadataflow_conv_dynamic(cfg): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5, vitis=do_synth)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # loop through experiment configurations for exp_cfg in exp_cfgs: @@ -451,6 +453,8 @@ def prepare_inputs(input_tensor): return {"inp": input_tensor} +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail # input datatype @pytest.mark.parametrize("idt", [DataType["UINT4"]]) # kernel size @@ -536,7 +540,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # Simulate 1 FM for each dimension in the series for i, ifm_dim in enumerate(ifm_dim_series): diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py index ccf5335395..6507bf6710 100644 --- a/tests/fpgadataflow/test_fpgadataflow_dwc.py +++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py @@ -165,7 +165,7 @@ def test_fpgadataflow_dwc_stitched_rtlsim(config, impl_style): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") y = oxe.execute_onnx(model, input_dict)["outp"] assert ( diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index 2d3e76f238..84c9f7f362 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -53,7 +53,6 @@ from finn.transformation.fpgadataflow.synth_ooc import SynthOutOfContext from finn.transformation.fpgadataflow.vitis_build import VitisBuild from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map -from finn.util.pyverilator import pyverilate_stitched_ip from finn.util.test import load_test_checkpoint_or_skip test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") @@ -239,40 +238,9 @@ def test_fpgadataflow_ipstitch_rtlsim(mem_mode): model = load_test_checkpoint_or_skip( ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch_%s.onnx" % mem_mode ) - model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") - sim = pyverilate_stitched_ip(model) - exp_io = [ - "ap_clk", - "ap_rst_n", - "s_axis_0_tdata", - "s_axis_0_tready", - "s_axis_0_tvalid", - "m_axis_0_tdata", - "m_axis_0_tkeep", - "m_axis_0_tlast", - "m_axis_0_tready", - "m_axis_0_tvalid", - "s_axi_control_0_araddr", - "s_axi_control_0_arready", - "s_axi_control_0_arvalid", - "s_axi_control_0_awaddr", - "s_axi_control_0_awready", - "s_axi_control_0_awvalid", - "s_axi_control_0_bready", - "s_axi_control_0_bresp", - "s_axi_control_0_bvalid", - "s_axi_control_0_rdata", - "s_axi_control_0_rready", - "s_axi_control_0_rresp", - "s_axi_control_0_rvalid", - "s_axi_control_0_wdata", - "s_axi_control_0_wready", - "s_axi_control_0_wstrb", - "s_axi_control_0_wvalid", - ] - assert sorted(dir(sim.io)) == sorted(exp_io) + model.set_metadata_prop("rtlsim_trace", "whole_trace.wdb") model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") idt = model.get_tensor_datatype("inp") ishape = model.get_tensor_shape("inp") x = gen_finn_dt_tensor(idt, ishape) diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index 2589c997e2..5d72389704 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -52,7 +52,8 @@ from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim -from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP + +# from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.derive_characteristic import DeriveCharacteristic from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.minimize_accumulator_width import ( @@ -65,7 +66,8 @@ from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths + +# from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers @@ -717,16 +719,18 @@ def test_fpgadataflow_rtl_mvau(mh, mw, pe, simd, idt, wdt, part, clk_ns): output_matmul == output_mvau_rtl ).all(), "Output of ONNX model not matching output of node-by-node RTLsim!" - # Run stitched-ip RTLsim - model = model.transform(InsertAndSetFIFODepths(part, clk_ns)) - model = model.transform(PrepareIP(part, clk_ns)) - model = model.transform(HLSSynthIP()) - model = model.transform(CreateStitchedIP(part, clk_ns)) - - model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") - output_mvau_rtl_stitch = oxe.execute_onnx(model, input_dict)["global_out"] + # Temporarily set to xfail because axilite read and write not enabled yet for pyxsi - assert ( - output_matmul == output_mvau_rtl_stitch - ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + # Run stitched-ip RTLsim + # model = model.transform(InsertAndSetFIFODepths(part, clk_ns)) + # model = model.transform(PrepareIP(part, clk_ns)) + # model = model.transform(HLSSynthIP()) + # model = model.transform(CreateStitchedIP(part, clk_ns)) + + # model.set_metadata_prop("exec_mode", "rtlsim") + # model.set_metadata_prop("rtlsim_backend", "pyxsi") + # output_mvau_rtl_stitch = oxe.execute_onnx(model, input_dict)["global_out"] + + # assert ( + # output_matmul == output_mvau_rtl_stitch + # ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py index c7685a4b09..8ffe751753 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py @@ -121,6 +121,8 @@ def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp return model +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) @pytest.mark.parametrize( "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] @@ -186,7 +188,7 @@ def test_runtime_thresholds_read(impl_style, idt_act_cfg, cfg, narrow, per_tenso model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while # we read/write new ones and reads seem to cause a disturbance too) @@ -222,6 +224,8 @@ def read_weights(sim): assert (y == expected).all() +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) @pytest.mark.parametrize( "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] @@ -300,7 +304,7 @@ def test_runtime_thresholds_write(impl_style, idt_act_cfg, cfg, narrow, per_tens model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model = model.transform(PrepareRTLSim()) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while # we read/write new ones and reads seem to cause a disturbance too) diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index ef3a120543..d16226010e 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -457,7 +457,7 @@ def test_fpgadataflow_vvau_rtl(kernel_size, in_feature_dim, in_chn, idt, wdt, pa partitioned_model = partitioned_model.transform(CreateStitchedIP(part, 5)) # set top-level prop for stitched-ip rtlsim and launch partitioned_model.set_metadata_prop("exec_mode", "rtlsim") - partitioned_model.set_metadata_prop("rtlsim_backend", "pyverilator") + partitioned_model.set_metadata_prop("rtlsim_backend", "pyxsi") # transpose input since we're now simulating HW layers (NCHW --> NHWC) input_dict["global_in"] = np.transpose(input_dict["global_in"], (0, 2, 3, 1)) output_vvau_stitched = oxe.execute_onnx( diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py index aaf30a052b..dcb50a11c7 100644 --- a/tests/fpgadataflow/test_runtime_weights.py +++ b/tests/fpgadataflow/test_runtime_weights.py @@ -49,6 +49,8 @@ target_clk_ns = 5 +# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi +@pytest.mark.xfail @pytest.mark.fpgadataflow @pytest.mark.vivado def test_runtime_weights_single_layer(): @@ -89,7 +91,7 @@ def test_runtime_weights_single_layer(): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") in_tensor = np.asarray(range(mw), dtype=np.float32) # add two copies of the input tensor as the first one is just used to # "flush out" the pipeline (as mvau already starts receiving old weights while From 3b8d0960f65a24d3fd12bc8bcc925ff05f98a23b Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 10:55:54 +0100 Subject: [PATCH 069/102] [FIFO] default to xsi instead of pyverilator for FIFO depth setting --- src/finn/transformation/fpgadataflow/set_fifo_depths.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index d8d8d9afa6..5e0902d64d 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -363,8 +363,6 @@ def apply(self, model): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(self.fpgapart, self.clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - # TODO: needs to check if pyxsi necessary - model.set_metadata_prop("rtlsim_backend", "pyverilator") if self.force_python_sim: # do rtlsim in Python for FIFO sizing @@ -435,9 +433,9 @@ def apply(self, model): # layer pipeline due to overlaps n_inferences = 2 - if backend is None or backend in ["verilator", "pyverilator"]: + if backend in ["verilator", "pyverilator"]: sim = verilator_fifosim(model, n_inferences) - elif backend in ["xsi", "pyxsi"]: + elif backend is None or backend in ["xsi", "pyxsi"]: sim = xsi_fifosim(model, n_inferences) else: assert False, f"Unrecognized backend for InsertAndSetFIFODepths: {backend}" From e45ffb560c5a402863aaead14b412d4a9220d5b0 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 21:59:15 +0100 Subject: [PATCH 070/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index de3a95ee4f..e50626ff4b 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="8c5abd8546bfc6e31292a52b5374116381239651" +PYXSI_COMMIT="a39195b2d46520b7bd947dffec7aa704fabd9734" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From af603f5f7f30a9e70ffee17787b482063b6118a5 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 21:59:28 +0100 Subject: [PATCH 071/102] [Test] use pyxsi for checksum test, including pre/post-hooks --- tests/fpgadataflow/test_fpgadataflow_checksum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 1cfb7f9ec9..7581e4ec23 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -31,7 +31,7 @@ import numpy as np from onnx import TensorProto, helper -from pyverilator.util.axi_utils import axilite_read, axilite_write +from pyxsi_utils import axilite_read, axilite_write from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp @@ -182,7 +182,7 @@ def test_fpgadataflow_checksum(): model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) model.set_metadata_prop("exec_mode", "rtlsim") - model.set_metadata_prop("rtlsim_backend", "pyverilator") + model.set_metadata_prop("rtlsim_backend", "pyxsi") # define function to read out the checksums from axilite checksums = [] From 42b92c089d3b6dc1cef163a84ee4fee6c63a3e28 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 22:04:20 +0100 Subject: [PATCH 072/102] [Test] remove xfail from checksum test, works with pyxsi now --- tests/fpgadataflow/test_fpgadataflow_checksum.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index eced4bfed4..7581e4ec23 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -135,8 +135,6 @@ def create_two_fc_model(): return model -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail @pytest.mark.vivado @pytest.mark.fpgadataflow def test_fpgadataflow_checksum(): From 338592761dbeba86c111ab15015a2843888787eb Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 25 Nov 2024 10:44:17 +0000 Subject: [PATCH 073/102] [Tests] Switch axilite read and write to pyxsi --- .../test_fpgadataflow_checksum.py | 12 ++++++++---- ...dataflow_convinputgenerator_rtl_dynamic.py | 16 ++++++++-------- .../test_fpgadataflow_thresholding_runtime.py | 19 +++++++++++-------- tests/fpgadataflow/test_runtime_weights.py | 15 ++++++++++----- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_checksum.py b/tests/fpgadataflow/test_fpgadataflow_checksum.py index 7581e4ec23..8198990512 100644 --- a/tests/fpgadataflow/test_fpgadataflow_checksum.py +++ b/tests/fpgadataflow/test_fpgadataflow_checksum.py @@ -31,7 +31,6 @@ import numpy as np from onnx import TensorProto, helper -from pyxsi_utils import axilite_read, axilite_write from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp @@ -51,6 +50,11 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -193,8 +197,8 @@ def read_checksum_and_drain(sim): drain_addr = 32 for i in range(len(model.get_nodes_by_op_type("CheckSum_hls"))): axi_name = "s_axi_checksum_{}_".format(i) - checksums.append(axilite_read(sim, chk_addr, basename=axi_name)) - drain.append(axilite_read(sim, drain_addr, basename=axi_name)) + checksums.append(pyxsi_utils.axilite_read(sim, chk_addr, basename=axi_name)) + drain.append(pyxsi_utils.axilite_read(sim, drain_addr, basename=axi_name)) drain_value = False @@ -202,7 +206,7 @@ def write_drain(sim): addr = 32 for i in range(len(model.get_nodes_by_op_type("CheckSum_hls"))): axi_name = "s_axi_checksum_{}_".format(i) - axilite_write(sim, addr, drain_value, basename=axi_name) + pyxsi_utils.axilite_write(sim, addr, drain_value, basename=axi_name) rtlsim_exec(model, inp, pre_hook=write_drain, post_hook=read_checksum_and_drain) checksum0_rtlsim = int(checksums[0]) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index e69da49fa1..b16a6b13f3 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -33,7 +33,6 @@ import onnx.parser as oprs import os from onnx import TensorProto, helper -from pyverilator.util.axi_utils import axilite_write, reset_rtlsim from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim @@ -65,6 +64,11 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import pyverilate_get_liveness_threshold_cycles +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + def create_conv_model(idim_h, idim_w, ifm, k, stride, ofm, idt, wdt, pad_mode, depthwise): np.random.seed(0) @@ -159,13 +163,13 @@ def config_hook(configs): return None def write_swg_config(sim): - reset_rtlsim(sim) + pyxsi_utils.reset_rtlsim(sim) for axi_name, config in configs: # Write config registers to the SWG/FMPadding dict # defines (addr, value) tuples for config_entry in config.values(): - axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) - reset_rtlsim(sim) + pyxsi_utils.axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) + pyxsi_utils.reset_rtlsim(sim) return write_swg_config @@ -205,8 +209,6 @@ def write_swg_config(sim): } -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail @pytest.mark.parametrize("cfg", [cfg0, cfg1, cfg2]) @pytest.mark.slow @pytest.mark.vivado @@ -453,8 +455,6 @@ def prepare_inputs(input_tensor): return {"inp": input_tensor} -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail # input datatype @pytest.mark.parametrize("idt", [DataType["UINT4"]]) # kernel size diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py index 8ffe751753..cd5bda6c27 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py @@ -31,7 +31,6 @@ import numpy as np import os from onnx import TensorProto, helper -from pyverilator.util.axi_utils import axilite_read, axilite_write from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.multithreshold import multithreshold @@ -47,6 +46,12 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + + test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -121,8 +126,6 @@ def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp return model -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) @pytest.mark.parametrize( "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] @@ -202,7 +205,9 @@ def test_runtime_thresholds_read(impl_style, idt_act_cfg, cfg, narrow, per_tenso def read_weights(sim): addr = 0 for i in range(len(old_weight_stream)): - extracted_weight_stream.append(axilite_read(sim, addr, basename="s_axilite_0_")) + extracted_weight_stream.append( + pyxsi_utils.axilite_read(sim, addr, basename="s_axilite_0_") + ) addr += 4 rtlsim_exec(model, exec_ctx, pre_hook=read_weights) @@ -224,8 +229,6 @@ def read_weights(sim): assert (y == expected).all() -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) @pytest.mark.parametrize( "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] @@ -317,7 +320,7 @@ def test_runtime_thresholds_write(impl_style, idt_act_cfg, cfg, narrow, per_tens def write_weights(sim): addr = 0 for nw in T_write_stream: - axilite_write(sim, addr, nw, basename="s_axilite_0_") + pyxsi_utils.axilite_write(sim, addr, nw, basename="s_axilite_0_") addr += 4 T_read_stream = [] @@ -325,7 +328,7 @@ def write_weights(sim): def read_weights(sim): addr = 0 for i in range(len(T_write_stream)): - T_read_stream.append(axilite_read(sim, addr, basename="s_axilite_0_")) + T_read_stream.append(pyxsi_utils.axilite_read(sim, addr, basename="s_axilite_0_")) addr += 4 rtlsim_exec(model, exec_ctx_write, pre_hook=write_weights, post_hook=read_weights) diff --git a/tests/fpgadataflow/test_runtime_weights.py b/tests/fpgadataflow/test_runtime_weights.py index dcb50a11c7..b63b531ff7 100644 --- a/tests/fpgadataflow/test_runtime_weights.py +++ b/tests/fpgadataflow/test_runtime_weights.py @@ -31,7 +31,6 @@ import numpy as np import os -from pyverilator.util.axi_utils import axilite_read, axilite_write from qonnx.core.datatype import DataType from qonnx.custom_op.registry import getCustomOp from qonnx.transformation.general import GiveUniqueNodeNames @@ -45,12 +44,16 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.create import hls_random_mlp_maker +try: + import pyxsi_utils +except ModuleNotFoundError: + pyxsi_utils = None + + test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 -# Temporarily set to xfail because axilite read and write not enabled yet for pyxsi -@pytest.mark.xfail @pytest.mark.fpgadataflow @pytest.mark.vivado def test_runtime_weights_single_layer(): @@ -103,7 +106,9 @@ def test_runtime_weights_single_layer(): def read_weights(sim): addr = 0 for i in range(len(old_weight_stream)): - extracted_weight_stream.append(axilite_read(sim, addr, basename="s_axilite_0_")) + extracted_weight_stream.append( + pyxsi_utils.axilite_read(sim, addr, basename="s_axilite_0_") + ) addr += 4 rtlsim_exec(model, exec_ctx, pre_hook=read_weights) @@ -124,7 +129,7 @@ def read_weights(sim): def write_weights(sim): addr = 0 for nw in new_weight_stream: - axilite_write(sim, addr, nw, basename="s_axilite_0_") + pyxsi_utils.axilite_write(sim, addr, nw, basename="s_axilite_0_") addr += 4 rtlsim_exec(model, exec_ctx, pre_hook=write_weights) From 68ce7931427a14109f59fabab9084704f75c8d5a Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 26 Nov 2024 22:18:50 +0100 Subject: [PATCH 074/102] [Test] convert negative ctrl reg vals to 2s complement in dynamic SWG test --- .../test_fpgadataflow_convinputgenerator_rtl_dynamic.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index b16a6b13f3..110c479a56 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -32,6 +32,7 @@ import numpy as np import onnx.parser as oprs import os +from bitstring import BitArray from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper @@ -168,7 +169,12 @@ def write_swg_config(sim): # Write config registers to the SWG/FMPadding dict # defines (addr, value) tuples for config_entry in config.values(): - pyxsi_utils.axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) + addr, val = config_entry + if val < 0: + # ensure any negative vals are expressed as two's complement, + # SWG control regs are currently always 32 bits + val = BitArray(int=val, length=32).uint + pyxsi_utils.axilite_write(sim, addr, val, basename=axi_name) pyxsi_utils.reset_rtlsim(sim) return write_swg_config From d0109a576fa756f4e85a4daba3beb4a40225c01b Mon Sep 17 00:00:00 2001 From: Shane Fleming Date: Mon, 9 Dec 2024 14:51:33 +0000 Subject: [PATCH 075/102] Adding liveness_threshold to the xsi call --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 492680791d..8b5c75a9d3 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -268,7 +268,8 @@ def rtlsim_multi_io(self, sim, io_dict): ) elif rtlsim_backend == "pyxsi": total_cycle_count = pyxsi_utils.rtlsim_multi_io( - sim, io_dict, num_out_values, sname=sname + sim, io_dict, num_out_values, sname=sname, + liveness_threshold=pyverilate_get_liveness_threshold_cycles(), ) else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" From 1abe1d92376415299e356a36717f51631f69aa29 Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 9 Dec 2024 15:16:51 +0000 Subject: [PATCH 076/102] [HWCustomOp] Run linting --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 8b5c75a9d3..ad3e9cc514 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -268,7 +268,10 @@ def rtlsim_multi_io(self, sim, io_dict): ) elif rtlsim_backend == "pyxsi": total_cycle_count = pyxsi_utils.rtlsim_multi_io( - sim, io_dict, num_out_values, sname=sname, + sim, + io_dict, + num_out_values, + sname=sname, liveness_threshold=pyverilate_get_liveness_threshold_cycles(), ) else: From fb14363a2d093ad2f77eee7e658f10cef597ef67 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 26 Nov 2024 21:41:25 +0100 Subject: [PATCH 077/102] [HLS] disable HLSFPO lib usage - may impact float cppsim! --- src/finn/custom_op/fpgadataflow/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..88188a1472 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -30,6 +30,7 @@ # template for single node execution docompute_template = """ #define AP_INT_MAX_W $AP_INT_MAX_W$ +#define HLS_NO_XIL_FPO_LIB #include "cnpy.h" #include "npy2apintstream.hpp" #include From 9ac4e12e9812e593cdb10730dcb020f09565486c Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 26 Nov 2024 21:42:05 +0100 Subject: [PATCH 078/102] [HLS] includes have moved to under Vitis path for 2024.2, specify both --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index ebf9c51d75..9749a507b2 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -262,6 +262,7 @@ def compile_singlenode_code(self): builder.append_includes("-I$FINN_ROOT/deps/finn-hlslib") builder.append_includes("-I$FINN_ROOT/custom_hls") builder.append_includes("-I{}/include".format(os.environ["HLS_PATH"])) + builder.append_includes("-I{}/include".format(os.environ["VITIS_PATH"])) builder.append_includes("--std=c++14") builder.append_includes("-O3") builder.append_sources(code_gen_dir + "/*.cpp") From a7ed8673aade6cb97a967e50fb04d49ce841c9dd Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 7 Jan 2025 09:56:48 +0100 Subject: [PATCH 079/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index e50626ff4b..0e0b059dd4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="a39195b2d46520b7bd947dffec7aa704fabd9734" +PYXSI_COMMIT="4cd84bed76ca066cfc4a4222ba8a57a510fc590c" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 0634323ce9d7e34a0ed567ddefef1a89c71f5e71 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 7 Jan 2025 16:20:03 +0000 Subject: [PATCH 080/102] [Tests] Re-enable stitched ip rtlsim tests for MVAUs --- tests/fpgadataflow/test_fpgadataflow_mvau.py | 32 +++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index 5d72389704..6237c97782 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -52,8 +52,7 @@ from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_estimation from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim - -# from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.derive_characteristic import DeriveCharacteristic from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.minimize_accumulator_width import ( @@ -66,8 +65,7 @@ from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode - -# from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths +from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers @@ -719,18 +717,16 @@ def test_fpgadataflow_rtl_mvau(mh, mw, pe, simd, idt, wdt, part, clk_ns): output_matmul == output_mvau_rtl ).all(), "Output of ONNX model not matching output of node-by-node RTLsim!" - # Temporarily set to xfail because axilite read and write not enabled yet for pyxsi - # Run stitched-ip RTLsim - # model = model.transform(InsertAndSetFIFODepths(part, clk_ns)) - # model = model.transform(PrepareIP(part, clk_ns)) - # model = model.transform(HLSSynthIP()) - # model = model.transform(CreateStitchedIP(part, clk_ns)) - - # model.set_metadata_prop("exec_mode", "rtlsim") - # model.set_metadata_prop("rtlsim_backend", "pyxsi") - # output_mvau_rtl_stitch = oxe.execute_onnx(model, input_dict)["global_out"] - - # assert ( - # output_matmul == output_mvau_rtl_stitch - # ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + model = model.transform(InsertAndSetFIFODepths(part, clk_ns)) + model = model.transform(PrepareIP(part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP(part, clk_ns)) + + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_backend", "pyxsi") + output_mvau_rtl_stitch = oxe.execute_onnx(model, input_dict)["global_out"] + + assert ( + output_matmul == output_mvau_rtl_stitch + ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" From afda943fec088678a1124cf2963aa6ca02f31799 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 8 Jan 2025 10:06:07 +0000 Subject: [PATCH 081/102] [Tests] Add vitis path to compilation in data packing tests --- tests/util/test_data_packing.py | 4 ++-- tests/util/test_hls_vector.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/util/test_data_packing.py b/tests/util/test_data_packing.py index a718f171e2..e821a2c6fb 100644 --- a/tests/util/test_data_packing.py +++ b/tests/util/test_data_packing.py @@ -99,9 +99,9 @@ def test_npy2apintstream(test_shape, dtype): f.write("\n".join(test_app_string)) cmd_compile = """ g++ -o test_npy2apintstream test.cpp $FINN_ROOT/deps/cnpy/cnpy.cpp \ --I$FINN_ROOT/deps/cnpy/ -I{}/include -I$FINN_ROOT/src/finn/qnn-data/cpp \ +-I$FINN_ROOT/deps/cnpy/ -I{}/include -I{}/include -I$FINN_ROOT/src/finn/qnn-data/cpp \ --std=c++11 -lz""".format( - os.environ["HLS_PATH"] + os.environ["HLS_PATH"], os.environ["VITIS_PATH"] ) with open(test_dir + "/compile.sh", "w") as f: f.write(cmd_compile) diff --git a/tests/util/test_hls_vector.py b/tests/util/test_hls_vector.py index 35d9b1b2fc..20fa0bf072 100644 --- a/tests/util/test_hls_vector.py +++ b/tests/util/test_hls_vector.py @@ -95,9 +95,9 @@ def test_npy2vectorstream(test_shape, dtype): f.write("\n".join(test_app_string)) cmd_compile = """ g++ -o test_npy2vectorstream test.cpp $FINN_ROOT/deps/cnpy/cnpy.cpp \ --I$FINN_ROOT/deps/cnpy/ -I{}/include -I$FINN_ROOT/src/finn/qnn-data/cpp \ +-I$FINN_ROOT/deps/cnpy/ -I{}/include -I{}/include -I$FINN_ROOT/src/finn/qnn-data/cpp \ --std=c++14 -lz """.format( - os.environ["HLS_PATH"] + os.environ["HLS_PATH"], os.environ["VITIS_PATH"] ) with open(test_dir + "/compile.sh", "w") as f: f.write(cmd_compile) From 6522c4994a67b9c902a7c27ad8db64aa73c13601 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 29 Nov 2024 10:13:41 +0100 Subject: [PATCH 082/102] [FIFO] call get_simkernel_so for xsicpp FIFO sizing too --- src/finn/core/rtlsim_exec.py | 2 ++ src/finn/qnn-data/cpp/xsi_simdriver.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index e1f3897787..f95322a1b4 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -218,6 +218,8 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc "TRACE_CMD": "" if trace_file is None else "top->trace_all();", # code to post-process final sim status to extract more data "POSTPROC_CPP": postproc_cpp, + # sim kernel .so to use (depends on Vivado version) + "SIMKERNEL_SO": pyxsi_utils.get_simkernel_so(), } for key, val in template_dict.items(): fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 5cefc6ce6b..d30f53ccc8 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -218,7 +218,7 @@ void reset() { int main(int argc, char *argv[]) { // load pre-compiled rtl simulation - std::string simengine_libname = "librdi_simulator_kernel.so"; + std::string simengine_libname = "@SIMKERNEL_SO@"; std::string design_libname = "xsim.dir/@TOP_MODULE_NAME@/xsimk.so"; top = new Xsi::Loader(design_libname, simengine_libname); s_xsi_setup_info info; From 55a3ff3764c504e8fbf5a6fe5ec9a5e228d25afa Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 9 Jan 2025 14:48:00 +0000 Subject: [PATCH 083/102] [HWCustomOp] Remove unused function for characterization based fifo sizing --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index ad3e9cc514..09a60dc5dd 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -30,7 +30,7 @@ import os import warnings from abc import abstractmethod -from pyverilator.util.axi_utils import _read_signal, reset_rtlsim, rtlsim_multi_io +from pyverilator.util.axi_utils import rtlsim_multi_io from qonnx.custom_op.base import CustomOp from qonnx.util.basic import roundup_to_integer_multiple @@ -357,8 +357,6 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): exp_cycles, ) sim = self.get_rtlsim() - # signal name - sname = "_" + self.hls_sname() + "_" if override_rtlsim_dict is not None: io_dict = override_rtlsim_dict else: @@ -374,32 +372,12 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - def monitor_txns(sim_obj): - for inp in txns_in: - in_ready = _read_signal(sim, inp + sname + "TREADY") == 1 - in_valid = _read_signal(sim, inp + sname + "TVALID") == 1 - if in_ready and in_valid: - txns_in[inp].append(1) - else: - txns_in[inp].append(0) - for outp in txns_out: - if ( - _read_signal(sim, outp + sname + "TREADY") == 1 - and _read_signal(sim, outp + sname + "TVALID") == 1 - ): - txns_out[outp].append(1) - else: - txns_out[outp].append(0) - - reset_rtlsim(sim) - total_cycle_count = rtlsim_multi_io( + self.reset_rtlsim(sim) + self.rtlsim_multi_io( sim, io_dict, - n_outs, - sname=sname, - liveness_threshold=period, - hook_preclk=monitor_txns, ) + total_cycle_count = self.get_nodeattr("cycles_rtlsim") assert ( total_cycle_count <= period ), """Total cycle count from rtl simulation is higher than From 6abbc53ce93d7a92e2b6e9ab126c888eaf907a32 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 11:07:23 +0100 Subject: [PATCH 084/102] [Deps] update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 0e0b059dd4..1b2b7cfb06 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="4cd84bed76ca066cfc4a4222ba8a57a510fc590c" +PYXSI_COMMIT="9bd8c4b6e6f3af9f676d48033f5cafea9d00633c" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 7b3e13a9acb15027784c7df815c03fe02c3f2008 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 11:01:50 +0100 Subject: [PATCH 085/102] [FIFO] lay groundwork for 2x-clkd xsicpp FIFO sizing --- src/finn/core/rtlsim_exec.py | 5 ++- .../rtl/matrixvectoractivation_rtl.py | 2 +- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 33 +++++++++++++++---- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index f95322a1b4..624b0c2221 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -194,7 +194,8 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc oshape_folded = lnode_inst.get_folded_output_shape() # TODO: retrieve the number of inputs from execution_context n_inputs = 1 - + # TODO determine according to presence of clk2x + clknames = "clk" # fill in the template arguments for sim driver template_dict = { # number of input transactions per inference @@ -212,6 +213,8 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc "INSTREAM_NAME": "s_axis_0", "OUTSTREAM_NAME": "m_axis_0", "CLK_NAME": "ap_clk", + "CLK2X_NAME": "ap_clk2x", + "CLKNAMES": clknames, "NRST_NAME": "ap_rst_n", # control tracing and trace filename "TRACE_FILE": "NULL" if trace_file is None else f'"{trace_file}"', diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 61797dd2fd..604ab92ff3 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -253,7 +253,7 @@ def generate_hdl(self, model, fpgapart, clk): os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_wrapper.v"), "w", ) as f: - f.write(template_wrapper.replace("$FORCE_BEHAVIORAL$", str(0))) + f.write(template_wrapper.replace("$FORCE_BEHAVIORAL$", str(1))) with open( os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"), "w", diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index d30f53ccc8..b0af7eb990 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -193,27 +193,48 @@ inline void toggle_clk_1() { top->run(5); } +inline void toggle_clk_and_clk2x_1() { + set_bool("@CLK_NAME@"); + set_bool("@CLK2X_NAME@"); + top->run(5); + clear_bool("@CLK2X_NAME@"); + top->run(5); +} + // falling clock edge + low clock inline void toggle_clk_0() { clear_bool("@CLK_NAME@"); top->run(5); } +inline void toggle_clk_and_clk2x_0() { + clear_bool("@CLK_NAME@"); + set_bool("@CLK2X_NAME@"); + top->run(5); + clear_bool("@CLK2X_NAME@"); + top->run(5); +} + // drive simulation for 1 clock period inline void toggle_clk() { toggle_clk_0(); toggle_clk_1(); } +inline void toggle_clk_and_clk2x() { + toggle_clk_and_clk2x_0(); + toggle_clk_and_clk2x_1(); +} + // apply reset to the simulation void reset() { clear_bool("@CLK_NAME@"); clear_bool("@NRST_NAME@"); - toggle_clk(); - toggle_clk(); + toggle_@CLKNAMES@(); + toggle_@CLKNAMES@(); set_bool("@NRST_NAME@"); - toggle_clk(); - toggle_clk(); + toggle_@CLKNAMES@(); + toggle_@CLKNAMES@(); } int main(int argc, char *argv[]) { @@ -264,7 +285,7 @@ int main(int argc, char *argv[]) { // TODO needs to be extended to non-bool signals for actual input data map signals_to_write; // toggle falling clock edge and drive low clock - toggle_clk_0(); + toggle_@CLKNAMES@_0(); // check for transactions on the input stream if(chk_bool("@INSTREAM_NAME@_tready") && chk_bool("@INSTREAM_NAME@_tvalid")) { n_in_txns++; @@ -290,7 +311,7 @@ int main(int argc, char *argv[]) { signals_to_write["@INSTREAM_NAME@_tvalid"] = false; } // toggle rising clock edge and drive high clock - toggle_clk_1(); + toggle_@CLKNAMES@_1(); // actually write the desired signals from the map for (auto const& x : signals_to_write) { From f997dd888fd4760c61d22c5e3262f2d2a3617f90 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 13:05:49 +0100 Subject: [PATCH 086/102] [FIFO] check for 2x clocked stitched-IP for xsicpp fifosim --- src/finn/core/rtlsim_exec.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 624b0c2221..5e3cb10cc3 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -194,8 +194,9 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc oshape_folded = lnode_inst.get_folded_output_shape() # TODO: retrieve the number of inputs from execution_context n_inputs = 1 - # TODO determine according to presence of clk2x - clknames = "clk" + # determine according to presence of clk2x + is_double_pumped = eval(model.get_metadata_prop("vivado_stitch_ifnames"))["clk2x"] != [] + clknames = "clk_and_clk2x" if is_double_pumped else "clk" # fill in the template arguments for sim driver template_dict = { # number of input transactions per inference From faa5d51b168f8e9b5102c32adcce5fb017d933a3 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 13:25:40 +0100 Subject: [PATCH 087/102] [FIFO] handle clk2x key not existing in vivado_stitch_ifnames --- src/finn/core/rtlsim_exec.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 5e3cb10cc3..530185468f 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -195,7 +195,15 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc # TODO: retrieve the number of inputs from execution_context n_inputs = 1 # determine according to presence of clk2x - is_double_pumped = eval(model.get_metadata_prop("vivado_stitch_ifnames"))["clk2x"] != [] + ifnames = model.get_metadata_prop("vivado_stitch_ifnames") + assert not ( + ifnames is None + ), "Couldn't find stitched-IP interface names, did you run IP stitching first?" + ifnames = eval(ifnames) + if "clk2x" in ifnames.keys(): + is_double_pumped = ifnames["clk2x"] != [] + else: + is_double_pumped = False clknames = "clk_and_clk2x" if is_double_pumped else "clk" # fill in the template arguments for sim driver template_dict = { From 6c896e48855acdf332ed7865ea397ec69bf86d0f Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 22 Nov 2024 15:26:39 +0100 Subject: [PATCH 088/102] [FIFO] support multiple I/O streams for xsicpp fifosim --- src/finn/core/rtlsim_exec.py | 52 ++++++----- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 109 ++++++++++++++++-------- 2 files changed, 106 insertions(+), 55 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 530185468f..ec2a4a4269 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -181,19 +181,28 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc fifosim_cpp_fname = get_finn_root() + "/src/finn/qnn-data/cpp/xsi_simdriver.cpp" with open(fifosim_cpp_fname, "r") as f: fifosim_cpp_template = f.read() - assert len(model.graph.input) == 1, "Only a single input stream is supported" - assert len(model.graph.output) == 1, "Only a single output stream is supported" - iname = model.graph.input[0].name - first_node = model.find_consumer(iname) - oname = model.graph.output[0].name - last_node = model.find_producer(oname) - assert (first_node is not None) and (last_node is not None), "Failed to find first/last nodes" - fnode_inst = getCustomOp(first_node) - lnode_inst = getCustomOp(last_node) - ishape_folded = fnode_inst.get_folded_input_shape() - oshape_folded = lnode_inst.get_folded_output_shape() - # TODO: retrieve the number of inputs from execution_context - n_inputs = 1 + + instream_iters = [] + outstream_iters = [] + for top_inp in model.graph.input: + iname = top_inp.name + first_node = model.find_consumer(iname) + assert first_node is not None, "Failed to find consumer for " + iname + fnode_inst = getCustomOp(first_node) + top_ind = list(first_node.input).index(iname) + ishape_folded = fnode_inst.get_folded_input_shape(ind=top_ind) + instream_iters.append(np.prod(ishape_folded[:-1])) + for top_out in model.graph.output: + oname = top_out.name + last_node = model.find_producer(oname) + assert last_node is not None, "Failed to find producer for " + oname + lnode_inst = getCustomOp(last_node) + top_ind = list(last_node.output).index(oname) + oshape_folded = lnode_inst.get_folded_output_shape(ind=top_ind) + outstream_iters.append(np.prod(oshape_folded[:-1])) + + # retrieve the number of inputs from execution_context + n_inferences = execution_context[model.graph.input[0].name] # determine according to presence of clk2x ifnames = model.get_metadata_prop("vivado_stitch_ifnames") assert not ( @@ -205,22 +214,27 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc else: is_double_pumped = False clknames = "clk_and_clk2x" if is_double_pumped else "clk" + instream_names = [x[0] for x in ifnames["s_axis"]] + instream_names_str = "{" + ", ".join(['"' + x + '"' for x in instream_names]) + "}" + outstream_names = [x[0] for x in ifnames["m_axis"]] + outstream_names_str = "{" + ", ".join(['"' + x + '"' for x in outstream_names]) + "}" + instream_iters_str = "{" + ", ".join([str(x) for x in instream_iters]) + "}" + outstream_iters_str = "{" + ", ".join([str(x) for x in outstream_iters]) + "}" # fill in the template arguments for sim driver template_dict = { # number of input transactions per inference - "ITERS_PER_INPUT": np.prod(ishape_folded[:-1]), + "ITERS_PER_INPUT": instream_iters_str, # number of output transactions per inference - "ITERS_PER_OUTPUT": np.prod(oshape_folded[:-1]), + "ITERS_PER_OUTPUT": outstream_iters_str, # number of inferences - "N_INPUTS": n_inputs, + "N_INFERENCES": n_inferences, # max number of cycles to wait for output activity before timeout "MAX_ITERS": timeout_cycles, # name of the top-level HDL module "TOP_MODULE_NAME": top_module_name, # names of the top-level AXI streams and signals - # TODO retrieve stream and signal names from model - "INSTREAM_NAME": "s_axis_0", - "OUTSTREAM_NAME": "m_axis_0", + "INSTREAM_NAME": instream_names_str, + "OUTSTREAM_NAME": outstream_names_str, "CLK_NAME": "ap_clk", "CLK2X_NAME": "ap_clk2x", "CLKNAMES": clknames, diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index b0af7eb990..0a9aeded21 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -251,23 +251,28 @@ int main(int argc, char *argv[]) { populate_port_map(); + vector instream_names = @INSTREAM_NAME@; + vector outstream_names = @OUTSTREAM_NAME@; // how much data to push into/pull out of sim - unsigned n_iters_per_input = @ITERS_PER_INPUT@; - unsigned n_iters_per_output = @ITERS_PER_OUTPUT@; - unsigned n_inputs = @N_INPUTS@; + vector n_iters_per_input = @ITERS_PER_INPUT@; + vector n_iters_per_output = @ITERS_PER_OUTPUT@; + unsigned n_inferences = @N_INFERENCES@; unsigned max_iters = @MAX_ITERS@; reset(); - unsigned n_in_txns = 0, n_out_txns = 0, iters = 0, last_output_at = 0; + vector n_in_txns(instream_names.size(), 0), n_out_txns(outstream_names.size(), 0); + size_t total_n_in_txns = 0, total_n_out_txns = 0; + unsigned iters = 0, last_output_at = 0; unsigned latency = 0; unsigned cycles_since_last_output = 0; + size_t n_finished_instreams = 0, n_finished_outstreams = 0; bool exit_criterion = false; cout << "Simulation starting" << endl; - cout << "Number of inputs to write " << n_iters_per_input * n_inputs << endl; - cout << "Number of outputs to expect " << n_iters_per_output * n_inputs << endl; + //cout << "Number of inputs to write " << n_iters_per_input * n_inputs << endl; + //cout << "Number of outputs to expect " << n_iters_per_output * n_inputs << endl; cout << "No-output timeout clock cycles " << max_iters << endl; chrono::steady_clock::time_point begin = chrono::steady_clock::now(); @@ -276,8 +281,10 @@ int main(int argc, char *argv[]) { bool output_done = false; bool timeout = false; - // enable reception on the output stream - set_bool("@OUTSTREAM_NAME@_tready"); + // enable reception on the output streams + for (auto & outstream_name : outstream_names) { + set_bool(outstream_name + "_tready"); + } while(!exit_criterion) { // keep track of which signals to write @@ -286,30 +293,60 @@ int main(int argc, char *argv[]) { map signals_to_write; // toggle falling clock edge and drive low clock toggle_@CLKNAMES@_0(); - // check for transactions on the input stream - if(chk_bool("@INSTREAM_NAME@_tready") && chk_bool("@INSTREAM_NAME@_tvalid")) { - n_in_txns++; + // check for transactions on the input streams + for(size_t i = 0; i < instream_names.size(); i++) { + string instream_name = instream_names[i]; + if(chk_bool(instream_name+"_tready") && chk_bool(instream_name + "_tvalid")) { + n_in_txns[i]++; + total_n_in_txns++; + // determine whether we have more inputs to feed + if(n_in_txns[i] == n_iters_per_input[i] * n_inferences) { + signals_to_write[instream_name + "_tvalid"] = false; + n_finished_instreams++; + } + } + + if(n_in_txns[i] < n_iters_per_input[i] * n_inferences) { + signals_to_write[instream_name + "_tvalid"] = true; + } else if(n_in_txns[i] > n_iters_per_input[i] * n_inferences) { + // more input transactions than specified, should never happen + // most likely a bug in the C++ driver code if this happens + cout << "WARNING: Unknown stream condition for input " << instream_name << endl; + signals_to_write[instream_name + "_tvalid"] = false; + } + } + + // check for transactions on the output streams + size_t n_outstreams_with_no_txn = 0; + for(size_t i = 0; i < outstream_names.size(); i++) { + string outstream_name = outstream_names[i]; + if(chk_bool(outstream_name+"_tready") && chk_bool(outstream_name + "_tvalid")) { + // TODO add output data capture to file here + // (unless we are in dummy data mode) + n_out_txns[i]++; + total_n_out_txns++; + // determine whether we have more outputs to consume + if(n_out_txns[i] == n_iters_per_output[i] * n_inferences) { + signals_to_write[outstream_name + "_tready"] = false; + n_finished_outstreams++; + } + } else { + n_outstreams_with_no_txn++; + } + if(n_out_txns[i] < n_iters_per_output[i] * n_inferences) { + signals_to_write[outstream_name + "_tready"] = true; + } else if(n_out_txns[i] > n_iters_per_output[i] * n_inferences) { + // more output transactions than specified + cout << "WARNING: Unknown stream condition for output " << outstream_name << endl; + signals_to_write[outstream_name + "_tready"] = false; + } } - // check for transactions on the output stream - if(chk_bool("@OUTSTREAM_NAME@_tready") && chk_bool("@OUTSTREAM_NAME@_tvalid")) { - n_out_txns++; - // TODO add output data capture to file here - // (unless we are in dummy data mode) - } else { + if(n_outstreams_with_no_txn == outstream_names.size()) { + // if none of the output streams had any activity: // keep track of no-activity cycles for timeout cycles_since_last_output++; } - // determine whether we have more inputs to feed - if(n_in_txns == n_iters_per_input * n_inputs) { - signals_to_write["@INSTREAM_NAME@_tvalid"] = false; - } else if(n_in_txns < n_iters_per_input * n_inputs) { - signals_to_write["@INSTREAM_NAME@_tvalid"] = true; - } else { - // more input transactions than specified, should never happen - // most likely a bug in the C++ driver code if this happens - cout << "Unknown stream condition for input!" << endl; - signals_to_write["@INSTREAM_NAME@_tvalid"] = false; - } + // toggle rising clock edge and drive high clock toggle_@CLKNAMES@_1(); // actually write the desired signals from the map @@ -322,22 +359,22 @@ int main(int argc, char *argv[]) { iters++; // show a progress message once in a while if(iters % 1000 == 0) { - cout << "Elapsed iters " << iters << " inps " << n_in_txns << " outs " << n_out_txns << endl; + cout << "Elapsed iters " << iters << " inps " << total_n_in_txns << " outs " << total_n_out_txns << endl; chrono::steady_clock::time_point end = chrono::steady_clock::now(); cout << "Elapsed since last report = " << chrono::duration_cast(end - begin).count() << "[s]" << endl; begin = end; } // check whether the exit criteria are reached - input_done = (n_in_txns >= n_iters_per_input * n_inputs); - output_done = (n_out_txns >= n_iters_per_output * n_inputs); + input_done = (n_finished_instreams == instream_names.size()); + output_done = (n_finished_outstreams == outstream_names.size()); timeout = (cycles_since_last_output > max_iters); exit_criterion = (input_done && output_done) || timeout; } // dump final simulation statistics to stdout and file cout << "Simulation finished" << endl; - cout << "Number of inputs consumed " << n_in_txns << endl; - cout << "Number of outputs produced " << n_out_txns << endl; + cout << "Number of inputs consumed " << total_n_in_txns << endl; + cout << "Number of outputs produced " << total_n_out_txns << endl; cout << "Number of clock cycles " << iters << endl; cout << "Input done? " << input_done << endl; cout << "Output done? " << output_done << endl; @@ -345,10 +382,10 @@ int main(int argc, char *argv[]) { ofstream results_file; results_file.open("results.txt", ios::out | ios::trunc); - results_file << "N_IN_TXNS" << "\t" << n_in_txns << endl; - results_file << "N_OUT_TXNS" << "\t" << n_out_txns << endl; + results_file << "N_IN_TXNS" << "\t" << total_n_in_txns << endl; + results_file << "N_OUT_TXNS" << "\t" << total_n_out_txns << endl; results_file << "cycles" << "\t" << iters << endl; - results_file << "N" << "\t" << n_inputs << endl; + results_file << "N" << "\t" << n_inferences << endl; results_file << "latency_cycles" << "\t" << latency << endl; // optionally, extract more data from final status @POSTPROC_CPP@ From ec36907308f1fe772503e24317e3dade0b979c33 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 11:11:28 +0100 Subject: [PATCH 089/102] [RTLMVU] FORCE_BEHAVIORAL=0 for synth again (came from cherry picking) --- .../custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py index 604ab92ff3..61797dd2fd 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/matrixvectoractivation_rtl.py @@ -253,7 +253,7 @@ def generate_hdl(self, model, fpgapart, clk): os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_wrapper.v"), "w", ) as f: - f.write(template_wrapper.replace("$FORCE_BEHAVIORAL$", str(1))) + f.write(template_wrapper.replace("$FORCE_BEHAVIORAL$", str(0))) with open( os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_wrapper_sim.v"), "w", From 1868a11beac3b65f52f7dfb2c81461207fd094f6 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 12:06:24 +0100 Subject: [PATCH 090/102] [XSICPP] report latency correctly --- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 0a9aeded21..c8018b3504 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -369,6 +369,14 @@ int main(int argc, char *argv[]) { output_done = (n_finished_outstreams == outstream_names.size()); timeout = (cycles_since_last_output > max_iters); exit_criterion = (input_done && output_done) || timeout; + // latency computation: when all outputs have generated 1 full sample + if(latency == 0) { + size_t n_outputs_with_one_completion = 0; + for(size_t i = 0; i < outstream_names.size(); i++) { + if(n_out_txns[i] == n_iters_per_output[i]) n_outputs_with_one_completion++; + } + if(n_outputs_with_one_completion == outstream_names.size() latency = iters; + } } // dump final simulation statistics to stdout and file From 407e00f0cc499195f89d676192adb9c2e695d3a2 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 12:08:09 +0100 Subject: [PATCH 091/102] [XSICPP] typo fix --- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index c8018b3504..6d3e38fffe 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -375,7 +375,7 @@ int main(int argc, char *argv[]) { for(size_t i = 0; i < outstream_names.size(); i++) { if(n_out_txns[i] == n_iters_per_output[i]) n_outputs_with_one_completion++; } - if(n_outputs_with_one_completion == outstream_names.size() latency = iters; + if(n_outputs_with_one_completion == outstream_names.size()) latency = iters; } } From 5003647c177cd5e09f80673b94d8df5d9dc2ae1a Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 12:14:25 +0100 Subject: [PATCH 092/102] [XSICPP] assert timeout --- src/finn/core/rtlsim_exec.py | 5 ++++- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 8 +++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index ec2a4a4269..e497bac94e 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -284,12 +284,15 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc launch_process_helper(runsim_cmd, proc_env=runsim_env, cwd=sim_base) # parse results file and return dict - with open(sim_base + "/results.txt", "r") as f: + results_filename = sim_base + "/results.txt" + with open(results_filename, "r") as f: results = f.read().strip().split("\n") ret_dict = {} for result_line in results: key, val = result_line.split("\t") ret_dict[key] = int(val) + if "TIMEOUT" in ret_dict.keys(): + assert ret_dict["TIMEOUT"] == 0, f"XSI C++ simulation timed out, see {results_filename}" return ret_dict diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 6d3e38fffe..94537f6e9e 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -375,7 +375,10 @@ int main(int argc, char *argv[]) { for(size_t i = 0; i < outstream_names.size(); i++) { if(n_out_txns[i] == n_iters_per_output[i]) n_outputs_with_one_completion++; } - if(n_outputs_with_one_completion == outstream_names.size()) latency = iters; + if(n_outputs_with_one_completion == outstream_names.size()) { + cout << "All outputs have now produced a sample, latency = " << iters << " cycles" << endl; + latency = iters; + } } } @@ -395,6 +398,9 @@ int main(int argc, char *argv[]) { results_file << "cycles" << "\t" << iters << endl; results_file << "N" << "\t" << n_inferences << endl; results_file << "latency_cycles" << "\t" << latency << endl; + results_file << "TIMEOUT" << "\t" << timeout ? 1 : 0<< endl; + results_file << "INPUT_DONE" << "\t" << input_done ? 1 : 0 << endl; + results_file << "OUTPUT_DONE" << "\t" << output_done ? 1 : 0 << endl; // optionally, extract more data from final status @POSTPROC_CPP@ results_file.close(); From 8e7311ade47929686193a73f49f93430e9be8054 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Fri, 10 Jan 2025 12:17:26 +0100 Subject: [PATCH 093/102] [XSICPP] typo fix --- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 94537f6e9e..c972613401 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -398,9 +398,9 @@ int main(int argc, char *argv[]) { results_file << "cycles" << "\t" << iters << endl; results_file << "N" << "\t" << n_inferences << endl; results_file << "latency_cycles" << "\t" << latency << endl; - results_file << "TIMEOUT" << "\t" << timeout ? 1 : 0<< endl; - results_file << "INPUT_DONE" << "\t" << input_done ? 1 : 0 << endl; - results_file << "OUTPUT_DONE" << "\t" << output_done ? 1 : 0 << endl; + results_file << "TIMEOUT" << "\t" << (timeout ? 1 : 0) << endl; + results_file << "INPUT_DONE" << "\t" << (input_done ? 1 : 0) << endl; + results_file << "OUTPUT_DONE" << "\t" << (output_done ? 1 : 0) << endl; // optionally, extract more data from final status @POSTPROC_CPP@ results_file.close(); From 946ed8255b9a2e3ed927bab0c5f4e59617870c02 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 10 Jan 2025 16:49:14 +0000 Subject: [PATCH 094/102] [Util] Set default liveness threshold to 1M --- src/finn/util/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index ffef82bd5a..dbbfc2b31f 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -142,7 +142,7 @@ def pyverilate_get_liveness_threshold_cycles(): """Return the number of no-output cycles rtlsim will wait before assuming the simulation is not finishing and throwing an exception.""" - return int(os.getenv("LIVENESS_THRESHOLD", 10000)) + return int(os.getenv("LIVENESS_THRESHOLD", 1000000)) def make_build_dir(prefix=""): From 967e5a124d1ab067a39391e60480ace760734ab3 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 13 Jan 2025 13:16:36 +0100 Subject: [PATCH 095/102] [XSICPP] settable liveness threshold & get from latency est for FIFO sizing --- src/finn/core/rtlsim_exec.py | 9 +++++++-- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 2 ++ .../fpgadataflow/set_fifo_depths.py | 16 +++++++++++----- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index e497bac94e..37c0443c0f 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -115,7 +115,9 @@ def file_to_basename(x): return os.path.basename(os.path.realpath(x)) -def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc_cpp=""): +def rtlsim_exec_cppxsi( + model, execution_context, dummy_data_mode=False, postproc_cpp="", timeout_cycles=None +): """Use XSI C++ rtl simulation to execute given model with stitched IP. The dummy_data_mode flag controls whether the simulation is driven by dummy data or real data. The execution_context parameter must be formatted @@ -133,12 +135,15 @@ def rtlsim_exec_cppxsi(model, execution_context, dummy_data_mode=False, postproc The postproc_cpp optional argument can be used to inject C++ code to retrieve extra data when the simulation is finished. See the @POSTPROC_CPP@ template argument in the xsi_simdriver.cpp file to see what context and functions are available. + If timeout_cycles is not None, the default value from pyverilate_get_liveness_threshold_cycles + will be used. """ # TODO: support running functional rtlsim with real I/O data # TODO: support running with multiple inputs/outputs # TODO: rename utility fxn to remove "pyverilate", used for other backends too - timeout_cycles = pyverilate_get_liveness_threshold_cycles() + if timeout_cycles is None: + timeout_cycles = pyverilate_get_liveness_threshold_cycles() assert dummy_data_mode, "Only dummy_data_mode=True is supported for now" diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index c972613401..3a92bc123a 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -321,6 +321,8 @@ int main(int argc, char *argv[]) { for(size_t i = 0; i < outstream_names.size(); i++) { string outstream_name = outstream_names[i]; if(chk_bool(outstream_name+"_tready") && chk_bool(outstream_name + "_tvalid")) { + // reset the no-output timeout counter + cycles_since_last_output = 0; // TODO add output data capture to file here // (unless we are in dummy data mode) n_out_txns[i]++; diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 5e0902d64d..c05c488129 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -197,10 +197,11 @@ def apply(self, model): return (model, False) -def xsi_fifosim(model, n_inferences, max_iters=100000000): +def xsi_fifosim(model, n_inferences, max_iters=None): """Create a XSI model of stitched IP and use a simple C++ driver to drive the input stream. Useful for FIFO sizing, latency - and throughput measurement.""" + and throughput measurement. If max_iters is None, use the default + liveness threshold instead.""" assert len(model.graph.input) == 1, "Only a single input stream is supported" assert len(model.graph.output) == 1, "Only a single output stream is supported" @@ -230,7 +231,9 @@ def xsi_fifosim(model, n_inferences, max_iters=100000000): fifo_ind += 1 fifo_log = "\n".join(fifo_log) # run XSI sim with postproc - ret_dict = rtlsim_exec_cppxsi(model, ctx, dummy_data_mode=True, postproc_cpp=fifo_log) + ret_dict = rtlsim_exec_cppxsi( + model, ctx, dummy_data_mode=True, postproc_cpp=fifo_log, timeout_cycles=max_iters + ) return ret_dict @@ -433,10 +436,13 @@ def apply(self, model): # layer pipeline due to overlaps n_inferences = 2 + # use the critical_path_cycles estimate to set the timeout limit for FIFO sim + max_iters = latency + if backend in ["verilator", "pyverilator"]: - sim = verilator_fifosim(model, n_inferences) + sim = verilator_fifosim(model, n_inferences, max_iters=max_iters) elif backend is None or backend in ["xsi", "pyxsi"]: - sim = xsi_fifosim(model, n_inferences) + sim = xsi_fifosim(model, n_inferences, max_iters=max_iters) else: assert False, f"Unrecognized backend for InsertAndSetFIFODepths: {backend}" From f99c5ca6485e064eb06b4f4e5c0cca712ad72797 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Mon, 13 Jan 2025 17:41:44 +0100 Subject: [PATCH 096/102] [HWCustomOp] bring back characterization with pyxsi + postclk hook --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 09a60dc5dd..491b31c482 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -248,7 +248,7 @@ def toggle_clk(self, sim): else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" - def rtlsim_multi_io(self, sim, io_dict): + def rtlsim_multi_io(self, sim, io_dict, hook_postclk=None): "Run rtlsim for this node, supports multiple i/o streams." # signal name suffix sname = "_" + self.hls_sname() + "_" @@ -273,6 +273,7 @@ def rtlsim_multi_io(self, sim, io_dict): num_out_values, sname=sname, liveness_threshold=pyverilate_get_liveness_threshold_cycles(), + hook_postclk=hook_postclk, ) else: assert False, f"Unknown rtlsim_backend {rtlsim_backend}" @@ -371,11 +372,31 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): # note that we restrict key names to filter out weight streams etc txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + # signal name + sname = "_" + self.hls_sname() + "_" + + def monitor_txns(sim_obj): + for inp in txns_in: + in_ready = pyxsi_utils._read_signal(sim_obj, inp + sname + "TREADY") == 1 + in_valid = pyxsi_utils._read_signal(sim_obj, inp + sname + "TVALID") == 1 + if in_ready and in_valid: + txns_in[inp].append(1) + else: + txns_in[inp].append(0) + for outp in txns_out: + if ( + pyxsi_utils._read_signal(sim_obj, outp + sname + "TREADY") == 1 + and pyxsi_utils._read_signal(sim_obj, outp + sname + "TVALID") == 1 + ): + txns_out[outp].append(1) + else: + txns_out[outp].append(0) self.reset_rtlsim(sim) self.rtlsim_multi_io( sim, io_dict, + hook_postclk=monitor_txns, ) total_cycle_count = self.get_nodeattr("cycles_rtlsim") assert ( From 87bf4b54eb2821082b7d3d431dad064452ae8025 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 14 Jan 2025 10:30:44 +0000 Subject: [PATCH 097/102] [XSICPP] add input rate limit support --- src/finn/core/rtlsim_exec.py | 9 ++++++++- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 37c0443c0f..a3239c4d22 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -116,7 +116,12 @@ def file_to_basename(x): def rtlsim_exec_cppxsi( - model, execution_context, dummy_data_mode=False, postproc_cpp="", timeout_cycles=None + model, + execution_context, + dummy_data_mode=False, + postproc_cpp="", + timeout_cycles=None, + rate_limit=1.0, ): """Use XSI C++ rtl simulation to execute given model with stitched IP. The dummy_data_mode flag controls whether the simulation is driven by @@ -251,6 +256,8 @@ def rtlsim_exec_cppxsi( "POSTPROC_CPP": postproc_cpp, # sim kernel .so to use (depends on Vivado version) "SIMKERNEL_SO": pyxsi_utils.get_simkernel_so(), + # rate limit for input throttling + "RATE_LIMIT": rate_limit, } for key, val in template_dict.items(): fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 3a92bc123a..3ffde51243 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -307,7 +307,8 @@ int main(int argc, char *argv[]) { } if(n_in_txns[i] < n_iters_per_input[i] * n_inferences) { - signals_to_write[instream_name + "_tvalid"] = true; + bool enable_throttled_input = ((float)n_in_txns[i] / (float)(iters+1)) <= @RATE_LIMIT@; + signals_to_write[instream_name + "_tvalid"] = enable_throttled_input; } else if(n_in_txns[i] > n_iters_per_input[i] * n_inferences) { // more input transactions than specified, should never happen // most likely a bug in the C++ driver code if this happens From aa420e9d0dabf9a59867b65a146c0e0137a28dd7 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 14 Jan 2025 10:56:00 +0000 Subject: [PATCH 098/102] [FIFO] wire up input rate limit support in XSICPP FIFO sizing --- .../fpgadataflow/set_fifo_depths.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index c05c488129..75511da972 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -197,11 +197,12 @@ def apply(self, model): return (model, False) -def xsi_fifosim(model, n_inferences, max_iters=None): +def xsi_fifosim(model, n_inferences, max_iters=None, rate_limit=1.0): """Create a XSI model of stitched IP and use a simple C++ driver to drive the input stream. Useful for FIFO sizing, latency and throughput measurement. If max_iters is None, use the default - liveness threshold instead.""" + liveness threshold instead. rate_limit can be used for throttling + the input stream.""" assert len(model.graph.input) == 1, "Only a single input stream is supported" assert len(model.graph.output) == 1, "Only a single output stream is supported" @@ -232,7 +233,12 @@ def xsi_fifosim(model, n_inferences, max_iters=None): fifo_log = "\n".join(fifo_log) # run XSI sim with postproc ret_dict = rtlsim_exec_cppxsi( - model, ctx, dummy_data_mode=True, postproc_cpp=fifo_log, timeout_cycles=max_iters + model, + ctx, + dummy_data_mode=True, + postproc_cpp=fifo_log, + timeout_cycles=max_iters, + rate_limit=rate_limit, ) return ret_dict @@ -253,6 +259,8 @@ class InsertAndSetFIFODepths(Transformation): smaller where appropriate :parameter vivado_ram_style: the StreamingFIFO.ram_style attribute to be used for large FIFOs implemented by Vivado afterwards + :parameter fifosim_input_throttle: use input throttling based on dataflow analysis + while doing simulation-based FIFO sizing Assumed input graph properties: @@ -288,6 +296,7 @@ def __init__( swg_exception=False, vivado_ram_style="auto", force_python_sim=False, + fifosim_input_throttle=True, ): super().__init__() self.fpgapart = fpgapart @@ -297,6 +306,7 @@ def __init__( self.swg_exception = swg_exception self.vivado_ram_style = vivado_ram_style self.force_python_sim = force_python_sim + self.fifosim_input_throttle = fifosim_input_throttle def apply(self, model): # these optypes may potentially use external weights @@ -439,10 +449,18 @@ def apply(self, model): # use the critical_path_cycles estimate to set the timeout limit for FIFO sim max_iters = latency + # set up rate limit for input throttling + if self.fifosim_input_throttle: + first_node = getCustomOp(model.graph.node[0]) + inp_fold = np.prod(first_node.get_folded_input_shape()[:-1]) + rate_limit = min(1.0, inp_fold / perf["max_cycles"]) + else: + rate_limit = 1.0 + if backend in ["verilator", "pyverilator"]: sim = verilator_fifosim(model, n_inferences, max_iters=max_iters) elif backend is None or backend in ["xsi", "pyxsi"]: - sim = xsi_fifosim(model, n_inferences, max_iters=max_iters) + sim = xsi_fifosim(model, n_inferences, max_iters=max_iters, rate_limit=rate_limit) else: assert False, f"Unrecognized backend for InsertAndSetFIFODepths: {backend}" From 0bb9f1ddf7c3f5ec872ae9cbc0658f031345ea7f Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 14 Jan 2025 10:56:28 +0000 Subject: [PATCH 099/102] [Build] add cfg options for input throttling and FIFO sim trace --- src/finn/builder/build_dataflow_config.py | 8 ++++++++ src/finn/builder/build_dataflow_steps.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 5d69802337..c2bf4e279a 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -281,6 +281,14 @@ class DataflowBuildConfig: #: Only relevant when `auto_fifo_depths = True` large_fifo_mem_style: Optional[LargeFIFOMemStyle] = LargeFIFOMemStyle.AUTO + #: Enable input throttling for simulation-based FIFO sizing + #: Only relevant if auto_fifo_strategy = LARGEFIFO_RTLSIM + fifosim_input_throttle: Optional[bool] = True + + #: Enable saving waveforms from simulation-based FIFO sizing + #: Only relevant if auto_fifo_strategy = LARGEFIFO_RTLSIM + fifosim_save_waveform: Optional[bool] = False + #: Target clock frequency (in nanoseconds) for Vitis HLS synthesis. #: e.g. `hls_clk_period_ns=5.0` will target a 200 MHz clock. #: If not specified it will default to synth_clk_period_ns diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index bddf4395ca..edac225cbb 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -585,6 +585,8 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): "Multi-in/out streams currently not supported " + "in FINN C++ verilator driver, falling back to Python" ) + if cfg.fifosim_save_waveform: + model.set_metadata_prop("rtlsim_trace", "fifosim_trace.wdb") model = model.transform( InsertAndSetFIFODepths( cfg._resolve_fpga_part(), @@ -592,8 +594,12 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): swg_exception=cfg.default_swg_exception, vivado_ram_style=cfg.large_fifo_mem_style, force_python_sim=force_python_sim, + fifosim_input_throttle=cfg.fifosim_input_throttle, ) ) + if cfg.fifosim_save_waveform: + # un-set rtlsim_trace to remove unwanted traces in later steps + model.set_metadata_prop("rtlsim_trace", "") # InsertAndSetFIFODepths internally removes any shallow FIFOs # so no need to call RemoveShallowFIFOs here else: From 47641a1d874a721b52e3bd9c71a77ead9b7090f5 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 14 Jan 2025 13:00:07 +0000 Subject: [PATCH 100/102] [FIFO,XSICPP] switch from rate limit to throttle cycles spec --- src/finn/core/rtlsim_exec.py | 8 ++++---- src/finn/qnn-data/cpp/xsi_simdriver.cpp | 6 +++++- .../fpgadataflow/set_fifo_depths.py | 16 +++++++++------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index a3239c4d22..920b29aa80 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -121,7 +121,7 @@ def rtlsim_exec_cppxsi( dummy_data_mode=False, postproc_cpp="", timeout_cycles=None, - rate_limit=1.0, + throttle_cycles=0, ): """Use XSI C++ rtl simulation to execute given model with stitched IP. The dummy_data_mode flag controls whether the simulation is driven by @@ -142,7 +142,7 @@ def rtlsim_exec_cppxsi( in the xsi_simdriver.cpp file to see what context and functions are available. If timeout_cycles is not None, the default value from pyverilate_get_liveness_threshold_cycles will be used. - + throttle_cycles will be used to pause the input stream every time an input frame is finished. """ # TODO: support running functional rtlsim with real I/O data # TODO: support running with multiple inputs/outputs @@ -256,8 +256,8 @@ def rtlsim_exec_cppxsi( "POSTPROC_CPP": postproc_cpp, # sim kernel .so to use (depends on Vivado version) "SIMKERNEL_SO": pyxsi_utils.get_simkernel_so(), - # rate limit for input throttling - "RATE_LIMIT": rate_limit, + # input throttling for rate limit + "THROTTLE_CYCLES": throttle_cycles, } for key, val in template_dict.items(): fifosim_cpp_template = fifosim_cpp_template.replace(f"@{key}@", str(val)) diff --git a/src/finn/qnn-data/cpp/xsi_simdriver.cpp b/src/finn/qnn-data/cpp/xsi_simdriver.cpp index 3ffde51243..61ec0f0af8 100644 --- a/src/finn/qnn-data/cpp/xsi_simdriver.cpp +++ b/src/finn/qnn-data/cpp/xsi_simdriver.cpp @@ -262,6 +262,7 @@ int main(int argc, char *argv[]) { reset(); vector n_in_txns(instream_names.size(), 0), n_out_txns(outstream_names.size(), 0); + vector throttle_input_until_time(instream_names.size(), 0); size_t total_n_in_txns = 0, total_n_out_txns = 0; unsigned iters = 0, last_output_at = 0; unsigned latency = 0; @@ -298,6 +299,9 @@ int main(int argc, char *argv[]) { string instream_name = instream_names[i]; if(chk_bool(instream_name+"_tready") && chk_bool(instream_name + "_tvalid")) { n_in_txns[i]++; + // determine whether this input will be throttled for rate-limiting + // every time an input frame is finished, we throttle for @THROTTLE_CYCLES@ cycles + if(n_in_txns[i] % n_iters_per_input[i] == 0) throttle_input_until_time[i] = iters + @THROTTLE_CYCLES@; total_n_in_txns++; // determine whether we have more inputs to feed if(n_in_txns[i] == n_iters_per_input[i] * n_inferences) { @@ -307,7 +311,7 @@ int main(int argc, char *argv[]) { } if(n_in_txns[i] < n_iters_per_input[i] * n_inferences) { - bool enable_throttled_input = ((float)n_in_txns[i] / (float)(iters+1)) <= @RATE_LIMIT@; + bool enable_throttled_input = (iters >= throttle_input_until_time[i]); signals_to_write[instream_name + "_tvalid"] = enable_throttled_input; } else if(n_in_txns[i] > n_iters_per_input[i] * n_inferences) { // more input transactions than specified, should never happen diff --git a/src/finn/transformation/fpgadataflow/set_fifo_depths.py b/src/finn/transformation/fpgadataflow/set_fifo_depths.py index 75511da972..276bb0b968 100644 --- a/src/finn/transformation/fpgadataflow/set_fifo_depths.py +++ b/src/finn/transformation/fpgadataflow/set_fifo_depths.py @@ -197,12 +197,12 @@ def apply(self, model): return (model, False) -def xsi_fifosim(model, n_inferences, max_iters=None, rate_limit=1.0): +def xsi_fifosim(model, n_inferences, max_iters=None, throttle_cycles=0): """Create a XSI model of stitched IP and use a simple C++ driver to drive the input stream. Useful for FIFO sizing, latency and throughput measurement. If max_iters is None, use the default - liveness threshold instead. rate_limit can be used for throttling - the input stream.""" + liveness threshold instead. throttle_cycles can be used for throttling + the input stream every time a frame is finished.""" assert len(model.graph.input) == 1, "Only a single input stream is supported" assert len(model.graph.output) == 1, "Only a single output stream is supported" @@ -238,7 +238,7 @@ def xsi_fifosim(model, n_inferences, max_iters=None, rate_limit=1.0): dummy_data_mode=True, postproc_cpp=fifo_log, timeout_cycles=max_iters, - rate_limit=rate_limit, + throttle_cycles=throttle_cycles, ) return ret_dict @@ -453,14 +453,16 @@ def apply(self, model): if self.fifosim_input_throttle: first_node = getCustomOp(model.graph.node[0]) inp_fold = np.prod(first_node.get_folded_input_shape()[:-1]) - rate_limit = min(1.0, inp_fold / perf["max_cycles"]) + throttle_cycles = max(0, perf["max_cycles"] - inp_fold) else: - rate_limit = 1.0 + throttle_cycles = 0 if backend in ["verilator", "pyverilator"]: sim = verilator_fifosim(model, n_inferences, max_iters=max_iters) elif backend is None or backend in ["xsi", "pyxsi"]: - sim = xsi_fifosim(model, n_inferences, max_iters=max_iters, rate_limit=rate_limit) + sim = xsi_fifosim( + model, n_inferences, max_iters=max_iters, throttle_cycles=throttle_cycles + ) else: assert False, f"Unrecognized backend for InsertAndSetFIFODepths: {backend}" From 31dc2dc007346749d9b804a52f3932357025a49d Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 15 Jan 2025 15:23:10 +0000 Subject: [PATCH 101/102] [Deps] Update pyxsi --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 1b2b7cfb06..0cd4cd6eb4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -39,7 +39,7 @@ XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" RFSOC4x2_BDF_COMMIT="13fb6f6c02c7dfd7e4b336b18b959ad5115db696" KV260_BDF_COMMIT="98e0d3efc901f0b974006bc4370c2a7ad8856c79" EXP_BOARD_FILES_MD5="226ca927a16ea4ce579f1332675e9e9a" -PYXSI_COMMIT="9bd8c4b6e6f3af9f676d48033f5cafea9d00633c" +PYXSI_COMMIT="4f4ec10a3631c4c44b5bc0ede698d41c924d2b86" QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" From 99aa799494476ab2b2fa51243b200afe5ba3313c Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 20 Jan 2025 15:41:20 +0000 Subject: [PATCH 102/102] [Docker/CI] Add pytest-forked to dependencies and update Jenkins commands --- docker/jenkins/Jenkinsfile | 4 ++-- requirements.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/jenkins/Jenkinsfile b/docker/jenkins/Jenkinsfile index 6d51fffd64..0b869b80a5 100644 --- a/docker/jenkins/Jenkinsfile +++ b/docker/jenkins/Jenkinsfile @@ -313,7 +313,7 @@ void createMultiMarkerScript(String markers, String testResultsFilename, String // Passing multiple markers when running ./run-docker.sh does not work with bash. // Therefore, create a script to maintain the single quotes that surround the markers sh """echo "#!/bin/bash -python -m pytest -m \'${markers}\' --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}" >> run-tests.sh +python -m pytest -m \'${markers}\' --forked --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}" >> run-tests.sh """ // Give permissions to script @@ -321,7 +321,7 @@ python -m pytest -m \'${markers}\' --junitxml=${testResultsFilename}.xml --html= } void runDockerPytestWithMarker(String marker, String testResultsFilename, String additionalOptions) { - sh """./run-docker.sh python -m pytest -m ${marker} --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}""" + sh """./run-docker.sh python -m pytest -m ${marker} --forked --junitxml=${testResultsFilename}.xml --html=${testResultsFilename}.html --self-contained-html ${additionalOptions}""" } def findBoardBuildFiles(String searchDir, String dirToFind) { diff --git a/requirements.txt b/requirements.txt index 1683695576..29d9e45b66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ pre-commit==3.3.2 protobuf==3.20.3 psutil==5.9.4 pyscaffold==4.4 +pytest-forked==1.6.0 scipy==1.10.1 setupext-janitor>=1.1.2 sigtools==4.0.1