diff --git a/software/glasgow/gateware/iostream.py b/software/glasgow/gateware/iostream.py index 2130c3d3c..5081a8241 100644 --- a/software/glasgow/gateware/iostream.py +++ b/software/glasgow/gateware/iostream.py @@ -162,15 +162,18 @@ def elaborate(self, platform): m.d.comb += buffer_parts.oe.eq(latch_parts.oe) def delay(value, name): + delayed_values = [] for stage in range(latency): next_value = Signal.like(value, name=f"{name}_{stage}") m.d.sync += next_value.eq(value) value = next_value - return value + delayed_values.append(next_value) + return delayed_values - i_en = delay(self.o_stream.valid & self.o_stream.ready & - self.o_stream.p.i_en, name="i_en") - meta = delay(self.o_stream.p.meta, name="meta") + i_en_delays = delay(self.o_stream.valid & self.o_stream.ready & + self.o_stream.p.i_en, name="i_en") + i_en = i_en_delays[-1] + meta = delay(self.o_stream.p.meta, name="meta")[-1] # This skid buffer is organized as a shift register to avoid any uncertainties associated # with the use of an async read memory. On platforms that have LUTRAM, this implementation @@ -195,7 +198,7 @@ def delay(value, name): m.d.comb += self.i_stream.payload.eq(skid[skid_at]) m.d.comb += self.i_stream.valid.eq(i_en | (skid_at != 0)) - m.d.comb += self.o_stream.ready.eq(self.i_stream.ready & (skid_at == 0)) + m.d.comb += self.o_stream.ready.eq(self.i_stream.ready | ~((skid_at!=0) | Cat(*i_en_delays).any())) return m diff --git a/software/tests/gateware/test_iostream.py b/software/tests/gateware/test_iostream.py index 15ec4b85a..6f45e4be1 100644 --- a/software/tests/gateware/test_iostream.py +++ b/software/tests/gateware/test_iostream.py @@ -299,6 +299,27 @@ def _subtest_sdr_and_ddr_input_sampled_correctly(self, o_valid_bits, i_en_bits, self._subtest_sdr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path) self._subtest_ddr_input_sampled_correctly(o_valid_bits, i_en_bits, i_ready_bits, timeout_clocks, iready_comb_path) + def test_i_ready_low_doesnt_block_when_not_sampling_inputs(self): + """ + This testcase ensures that if i_stream is blocked, we can still perform + non-sampling updates, as long as we never try to sample. + """ + self._subtest_sdr_and_ddr_input_sampled_correctly( + o_valid_bits = "010101010", + i_en_bits = "000000000", + i_ready_bits = "") + + def test_i_ready_low_doesnt_block_when_iready_combinatorial(self): + """ + This testcase ensures that an i_stream consumer, which is following + stream rules, and is allowed to generate ready like this: + ```m.d.comb += ready.eq(valid)```, will not cause the system to lock up + """ + self._subtest_sdr_and_ddr_input_sampled_correctly( + o_valid_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4, + i_en_bits = "0111100000" + (("0" * MAX_SKIDBUFFER_SIZE) + "0") * 4, + i_ready_bits = "0000000000" + (("0" * MAX_SKIDBUFFER_SIZE) + "1") * 4, iready_comb_path = True) + def _subtest_fill_skid_buffer_to_level_static(self, level): """ This testcase fills up the skid buffer up to a given level