So, my Verilog is very week, but I made a VHDL design that has been verified enough that I would sort of trust it. I then bashed up some really rough Verilog, and just worked around my lack of skill (e.g. how to set up the Kernel ROM correctly).
I then test-benched it against the original, debugged it till they were in agreement. Resource count is identical (1xDSP, 2xBRAM, 80 registers...), timing is identical, so I am sure they are bug-for-bug compatible. Here it is.
Timing summary:
---------------
Timing errors: 0 Score: 0 (Setup/Max: 0, Hold: 0)
Constraints cover 640 paths, 0 nets, and 290 connections
Design statistics:
Minimum period: 3.570ns{1} (Maximum frequency: 280.112MHz)
`timescale 1ns / 1ps
module bandpass(
input clk,
input [17:0] din,
input din_enable,
output [47:0] dout,
output dout_enable
);
reg num_taps = 255;
reg signed[17:0] buffer[0:1023]; /* Data in, sized for a block RAM */
/* Pipelining for the multipler */
reg signed[17:0] a1,a2,a3;
reg signed[17:0] b1,b2,b3;
/* Results of multipication. */
reg signed[35:0] product;
/* Accumulate the products */
reg signed[47:0] accumulator;
reg [47:0] result;
reg result_enable;
reg [9:0] max_count = 255;
reg [9:0] data_index;
reg [9:0] coeff_index;
reg [9:0] write_index;
/* Shift registers for scheduling things */
reg [4:0] reset_accum_sr;
reg [4:0] eject_result_sr;
assign dout = result;
assign dout_enable = result_enable;
integer i;
//////////////////////////////////////////
// Not sure how to assign initial values
//////////////////////////////////////////
initial
begin
for (i=0; i<1024; i=i+1)
buffer[i] = 2'b00;
data_index = 255;
max_count = 255;
write_index = 254;
coeff_index = 254;
end
always @(posedge clk) begin
/****************************
* The inferred DSP block *
****************************/
/* The accumulator */
accumulator <= (reset_accum_sr[0] == 1 ? accumulator: 48'b0) + { {12{product[35]}}, product[35:0] };
/* The multiply operation*/
product <= a3 * b3;
/* The input pipeline */
a3 <= a2;
a2 <= a1;
a1 <= buffer[data_index];
b3 <= b2;
b2 <= b1;
// b1 <= coeff[coeff_index];
/////////////////////////////////////////////////////////////////
// Can't work out how to infer an pre-initialised ROM properly
/////////////////////////////////////////////////////////////////
// Filter Kernel
case(coeff_index)
10'b0000000000: b1 <= 18'b111111110111111110; 10'b0000000001: b1 <= 18'b111111101000001011;
10'b0000000010: b1 <= 18'b111111011001100110; 10'b0000000011: b1 <= 18'b111111001101000111;
10'b0000000100: b1 <= 18'b111111000011100001; 10'b0000000101: b1 <= 18'b111110111101011101;
10'b0000000110: b1 <= 18'b111110111011010110; 10'b0000000111: b1 <= 18'b111110111101011001;
... 496 more lines ....
10'b1111111100: b1 <= 18'b000000000000000000; 10'b1111111101: b1 <= 18'b000000000000000000;
10'b1111111110: b1 <= 18'b000000000000000000; 10'b1111111111: b1 <= 18'b000000000000000000;
endcase;
/***********************************
* Ejecting the result of the filter*
***********************************/
if (eject_result_sr[0] == 1)
begin
result <= accumulator;
result_enable <= 1;
end else begin
result_enable <= 0;
end
/*********************************
* When we need to trigger the *
* ejecting the result *
*********************************/
if (coeff_index == max_count-1)
begin
eject_result_sr = {1'b1, eject_result_sr[4:1]};
end else begin
eject_result_sr = {1'b0, eject_result_sr[4:1]};
end
/*********************************
* Restarting the filter when new *
* data arrives *
*********************************/
if (din_enable == 1)
begin
reset_accum_sr = {1'b0, reset_accum_sr[4:1]};
coeff_index = 0;
data_index = write_index - max_count + 1;
end else begin
reset_accum_sr = {1'b1, reset_accum_sr[4:1]};
if (coeff_index != max_count)
begin
coeff_index = coeff_index + 1;
data_index = data_index + 1;
end
end;
/*********************************
* Storing new data in the buffer *
*********************************/
if (din_enable == 1)
begin
buffer[write_index] <= din;
write_index <= write_index+1;
end;
end
endmodule
=