Greatest Common Divisor (GCD) |
|
while ( x != y ) {
if ( x < y ) y = y - x;
else x = x - y;
}
Input (x, y) and output signals (xo), and control signals (rst - reset, rdy - ready). The I/O protocol is based on the following - when rst=='0', inputs are latched into internal registers and rdy<='0' indicates that the algorithm has started. The test bench sets rst back to '1' and waits until rdy gets '1' - the algorithm has finished and the result is on the output. Of course, the polarity if control signals can be changed but the the test bench must be modified accordingly.
| Control Data Flow Graph | VHDL | SystemVerilog |
|
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
entity gcd is
port (xi, yi : in unsigned(15 downto 0);
rst : in bit;
xo : out unsigned(15 downto 0);
rdy : out bit := '1';
clk : in bit);
end gcd;
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
architecture experiments of gcd is
begin
process
variable x, y: unsigned(15 downto 0);
begin
-- Wait for the new input data
wait on clk until clk='1' and rst='0';
x := xi; y := yi; rdy <= '0';
wait on clk until clk='1';
-- Calculate
while x /= y loop
if x < y then y := y - x;
else x := x - y; end if;
end loop;
-- Ready
xo <= x; rdy <= '1';
wait on clk until clk='1';
end process;
end experiments; |
`timescale 1 ns / 1 ns
module gcd (
input logic unsigned [15:0] xi, yi,
input logic rst,
output logic unsigned [15:0] xo,
output logic rdy,
input logic clk );
logic unsigned [15:0] x, y;
initial rdy = 1;
always begin
// Wait for the new input data
while ( rst == 1 ) @(posedge clk);
x = xi; y = yi; rdy = 0;
@(posedge clk);
// Calculate
while ( x != y )
if ( x < y ) y = y - x;
else x = x - y;
// Ready
xo = x; rdy = 1;
@(posedge clk);
end
endmodule
|
Simulation results of the behavioral model (the test bench is below):
| Test bench | VHDL | SystemVerilog |
|
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
entity test is end test;
architecture bench of test is
signal clk, rst, rdy, hlt: bit := '1';
signal x, y, res: unsigned(15 downto 0);
component gcd
port (xi, yi : in unsigned(15 downto 0);
rst : in bit;
xo : out unsigned(15 downto 0);
rdy : out bit;
clk : in bit);
end component;
begin
clk <= not clk after 10 ns when hlt='1';
U1: gcd port map (x, y, rst, res, rdy, clk);
process
type int_array is array (0 to 7) of integer;
constant a: int_array := (27, 33, 245, 121, 52, 333, 125, 422);
constant b: int_array := (18, 256, 45, 11, 452, 121, 625, 312);
-- expected results (GCD) 9, 1, 5, 11, 4, 1, 125, 2
begin
wait on clk until clk='0';
for i in a'range loop
x <= conv_unsigned(a(i),16);
y <= conv_unsigned(b(i),16);
rst <= '0';
wait on clk until clk='0';
rst <= '1';
--wait on clk until clk='0';
while rdy = '0' loop
wait on clk until clk='0';
end loop;
wait on clk until clk='0';
end loop;
wait on clk until clk='0';
hlt <= '0'; wait;
end process;
end bench; |
`timescale 1 ns / 1 ns
module testbench;
logic clk, rst, rdy;
logic unsigned [15:0] x, y, res;
// expected results (GCD)
// 9, 1, 5, 11, 4, 1, 125, 2
parameter logic unsigned [15:0] a [8] =
{ 27, 33, 245, 121, 52, 333, 125, 422 };
parameter logic unsigned [15:0] b [8] =
{ 18, 256, 45, 11, 452, 121, 625, 312 };
initial clk = 1; always #10 clk = !clk;
gcd u1 (x, y, rst, res, rdy, clk);
initial begin
rst = 1;
@(negedge clk);
for ( int i=0; i<8; i++ ) begin
x = a[i]; y = b[i]; rst = 0;
@(negedge clk) rst = 1;
// @(negedge clk);
while ( rdy == 0 ) @(negedge clk);
@(negedge clk);
end
@(negedge clk) $stop;
end
endmodule
|
| Clocked behavioral model | VHDL | SystemVerilog |
|
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
architecture experiments of gcd is
signal x, y: unsigned(15 downto 0);
begin
process begin
-- Wait for the new input data
while rst = '1' loop
wait on clk until clk='1';
end loop;
x <= xi; y <= yi; rdy <= '0';
wait on clk until clk='1';
-- Calculate
while x /= y loop
if x < y then y <= y - x;
else x <= x - y; end if;
wait on clk until clk='1';
end loop;
-- Ready
xo <= x; rdy <= '1';
wait on clk until clk='1';
end process;
end experiments; |
`timescale 1 ns / 1 ns
module gcd (
input logic unsigned [15:0] xi, yi,
input logic rst,
output logic unsigned [15:0] xo,
output logic rdy,
input logic clk );
logic unsigned [15:0] x, y;
always begin
// Wait for the new input data
while ( rst == 1 ) @(posedge clk);
x = xi; y = yi; rdy = 0;
@(posedge clk);
// Calculate
while ( x != y ) begin
if ( x < y ) y = y - x;
else x = x - y;
@(posedge clk);
end
// Ready
xo = x; rdy = 1;
@(posedge clk);
end
endmodule
|
Simulation results of the clocked behavioral model:
| Behavioral state machine | VHDL | SystemVerilog |
|
library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;
architecture experiments of gcd is
type state_type is (S_wait, S_start, S_ready);
signal state: state_type;
signal x, y: unsigned(15 downto 0);
begin
process begin
wait on clk until clk='1';
case state is
-- Wait for the new input data
when S_wait =>
if rst='0' then
x <= xi; y <= yi; rdy <= '0'; state <= S_start;
end if;
-- Calculate
when S_start =>
if x /= y then
if x < y then y <= y - x;
else x <= x - y; end if;
state <= S_start;
else
xo <= x; rdy <= '1'; state <= S_ready;
end if;
-- Ready
when S_ready => state <= S_wait;
end case;
end process;
end experiments; |
`timescale 1 ns / 1 ns
module gcd (
input logic unsigned [15:0] xi, yi,
input logic rst,
output logic unsigned [15:0] xo,
output logic rdy,
input logic clk );
// State - type & signals
// Will be interpreted as 32-bit integer in syhtesis and 'default'
// must be used in case statements to avoid latches. Synthesizer
// will remove extra 30 bits.
enum {S_wait, S_start, S_comp, S_ready} state, next_state;
logic unsigned [15:0] x, y;
always @(posedge clk)
case (state)
// Wait for the new input data
S_wait :
if (rst==0) begin
x <= xi; y <= yi; rdy <= 0; state <= S_start;
end
// Calculate
S_start :
if ( x != y ) begin
if ( x < y ) y <= y - x;
else x <= x - y;
state <= S_start;
end
else begin
xo <= x; rdy <= 1; state <= S_ready;
end
// Ready
default : state <= S_wait;
endcase
endmodule
|
Simulation results of the behavioral state machine model:
Beginning of the same simulation - changing of states is visible:
The next five models fully match the register-transfer level (RTL) abstractions - there is a clear state machine and the data-part consists of components (register, multiplexers, subtracters, etc). Also, internal control signals exist. All models are synthesizable.
RTL #1 (VHDL/SystemVerilog) - single ALU, 3 clock cycles per iteration (see structure).
RTL #2 (VHDL/SystemVerilog) - single ALU, 2 clock cycles per iteration (see structure).
RTL #3 (VHDL/SystemVerilog) - comparator (less than) controls subtraction, 1 clock cycle per iteration – small but slow (sequential) data-part (see structure).
RTL #4 (VHDL/SystemVerilog) and RTL #5 (VHDL/SystemVerilog) - out-of-order execution – both subtractions are calculated first then the decision is made. The difference is how deciders/comparators are implemented (see structure #4 and structure #5).
Synthesis results and RTL schematics (VHDL/SystemVerilog).
Hand-outs about the algorithm description and different implementations (VHDL).
HDL files to compare components - subtracter and two comparators (VHDL/SystemVerilog), universal ALU (VHDL/SystemVerilog), and test bench for checking (VHDL/SystemVerilog).