------------------------------------------------------------------------
-- 16x16->32-bit unsigned sequential radix-4 multiplier
--   2 bits at a time calculation; behavioral-RTL style
-- Clock, start & stop signals added to use with
--   the adapted GCD testbench
-- A bug fixed: logical shift of 'c' replaced with arithmetic shift
-- L(R)V - 2025
------------------------------------------------------------------------

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;

entity multiplier is
  port ( xi, yi: in unsigned(15 downto 0);
         rst    : in std_logic;
         xo     : out unsigned(31 downto 0);
         rdy    : out std_logic;
         clk    : in std_logic);
end entity multiplier;

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;

architecture bhv_rtl_4 of multiplier is
  -- Debug signals
  signal a, b: unsigned(15 downto 0);
  signal c: unsigned(31 downto 0);
begin
  process
    variable a_bf, b_bf: unsigned(15 downto 0);
    variable c_bf: unsigned(31 downto 0);
    variable prev: std_logic;
    procedure mul_2bits ( variable a: in unsigned(15 downto 0);
                          variable b: inout unsigned(15 downto 0);
                          variable c: inout unsigned(31 downto 0);
                          variable prv: inout std_logic ) is
      variable b_bits: std_logic_vector(2 downto 0);
    begin
      b_bits := prv & std_logic_vector(b(1 downto 0));
      case b_bits is
      when "000" =>
        prv := '0';  c := c;
      when "001" =>
        prv := '0';  c := c + (a & "0000000000000000");
      when "010" =>
        prv := '0';  c := c + (a(14 downto 0) & "00000000000000000");
      when "011" =>
        prv := '1';  c := c - (a & "0000000000000000");
      when "100" =>
        prv := '0';  c := c + (a & "0000000000000000");
      when "101" =>
        prv := '0';  c := c + (a(14 downto 0) & "00000000000000000");
      when "110" =>
        prv := '1';  c := c - (a & "0000000000000000");
      when others =>
        prv := '1';  c := c;
      end case; 
      b := "00" & b(15 downto 2);    c := c(31) & c(31) & c(31 downto 2);
    end procedure mul_2bits;
  begin
    -- Wait for the new input data
    wait on clk until clk='1' and rst='0';

    a_bf := xi;  b_bf := yi;  c_bf := (others=>'0');
    a <= a_bf;   b <= b_bf;   c <= c_bf;
    prev := '0';  rdy <= '0';
    wait on clk until clk='1';

    -- Calculate
    mul_2bits(a_bf,b_bf,c_bf,prev);
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';
    mul_2bits(a_bf,b_bf,c_bf,prev);  --  wait on clk until clk='1';
    a <= a_bf;   b <= b_bf;   c <= c_bf;   wait on clk until clk='1';

    -- Ready
    xo <= c_bf;    rdy <= '1';
    wait on clk until clk='1';
  end process;

end architecture bhv_rtl_4;
