数字系统设计复习笔记：第六篇

逻辑模块

感觉超前进位加法器和乘法器是难点。不知道考试会怎么考。还是需要理解才行。

全加器

数据流描述

module full_adder (a, b, cin, sum, cout);
    //list all inputs and outputs
    input a, b, cin;
    output sum, cout;
    //define wires
    wire a, b, cin;
    wire sum, cout;
    //continuous assign
    assign sum = (a ^ b) ^ cin;
    assign cout = (a&cin)|(b&cin)|(a&b);
    //assign cout = cin & (a ^ b) | (a & b);
endmodule

行为级描述

module add_4(A, B, sum, C);
    input [3 : 0] A, B;
    output [3 : 0] sum;
    output C;
    assign {C, sum } = A + B;
endmodule

我还是更喜欢这个。

加法器链的弊端：太慢！因为进位在整个1-bit加法器链上激起了波动。

超前进位加法器

更详细的解析可以参考超大规模集成电路设计笔记：第一篇

参考PPT P12

//数据流形式8-bit carry lookahead adder
module adder_cla8 (
    a,
    b,
    cin,
    sum,
    cout
);
    
    input [7:0] a, b;  //input/output port declaration
    input cin;
    output [7:0] sum;
    output cout;
    
    wire g7, g6, g5, g4, g3, g2, g1, g0;  //define internal wires
    wire p7, p6, p5, p4, p3, p2, p1, p0;
    wire c7, c6, c5, c4, c3, c2, c1, c0;
    
    //define generate functions，//multiple statements using 1 assign
    assign 
        g7 = a[7] & b[7],
        g6 = a[6] & b[6],
        g5 = a[5] & b[5],
        g4 = a[4] & b[4],
        g3 = a[3] & b[3],
        g2 = a[2] & b[2],
        g1 = a[1] & b[1],
        g0 = a[0] & b[0];
    
    //define propagate functions，multiple statements using 1 assign
    assign 
        p7 = a[7] ^ b[7],
        p6 = a[6] ^ b[6],
        p5 = a[5] ^ b[5],
        p4 = a[4] ^ b[4],
        p3 = a[3] ^ b[3],
        p2 = a[2] ^ b[2],
        p1 = a[1] ^ b[1],
        p0 = a[0] ^ b[0];
    
    //obtain the carry equations for low order
    assign 
        c0 = g0 | (p0 & cin),
        c1 = g1 | (p1 & g0) | (p1 & p0 & cin),
        c2 = g2 | (p2 & g1) | (p2 & p1 & g0) | (p2 & p1 & p0 & cin),
        c3 = g3 | (p3 & g2) | (p3 & p2 & g1) | (p3 & p2 & p1 & g0) |
            (p3 & p2 & p1 & p0 & cin);
    
    //obtain the carry equations for high order
    assign 
        c4 = g4 | (p4 & c3),
        c5 = g5 | (p5 & g4) | (p5 & p4 & c3),
        c6 = g6 | (p6 & g5) | (p6 & p5 & g4) | (p6 & p5 & p4 & c3),
        c7 = g7 | (p7 & g6) | (p7 & p6 & g5) | (p7 & p6 & p5 & g4) |
            (p7 & p6 & p5 & p4 & c3);
    
    //obtain the sum equations
    assign 
        sum[0] = p0 ^ cin,
        sum[1] = p1 ^ c0,
        sum[2] = p2 ^ c1,
        sum[3] = p3 ^ c2,
        sum[4] = p4 ^ c3,
        sum[5] = p5 ^ c4,
        sum[6] = p6 ^ c5,
        sum[7] = p7 ^ c6;
    
    //obtain cout
    assign cout = c7;
    
endmodule

同理，16 位的二进制超前进位加法电路可用 4 个四位二进制超前进位加法电路再加上超前进位形成逻辑来构成，依次类推可以设计出 32 位和64 位的加法电路。
如果设计的是 32 位或 64 位的加法器，延迟就会更大。为了加快计算的节拍，可以在运算电路的组合逻辑层中加入多个寄存器组来暂存中间结果。也就是采用数字逻辑设计中常用的流水线（pipeline)办法，来提高运算速度，以便更有效地利用该运算电路。

乘法器

数据流描述

被乘数的每一位都要与乘数的低阶位b0相乘产生一行部分积。
然后，被乘数的每一位都被乘数的b1位所乘，产生的部分积向左移动一个bit位。这个过程对b2和b3重复进行。
然后，部分积被加在一起，形成乘积。任何一列的进位输出都被加到相应的下一个高阶列（不在同一行内横向传播），这称为Wallace Tree（华莱士树）。~~我和它谈笑风生（指数据流）~~

放代码也没啥意义。理解最重要。

行为级描述

module mult_4(X, Y, Product);
    input [3 : 0] X, Y;
    output [7 : 0] Product;
    assign Product = X * Y;
endmodule

简单吧？

比较器

module compare_n (
    X,
    Y,
    XGY,
    XSY,
    XEY
);

    input [width-1:0] X, Y;
    output XGY, XSY, XEY;
    reg XGY, XSY, XEY;
    parameter width = 8;
    
        always @(X or Y)
        // 每当X 或Y 变化时
        begin
            if (X == Y)
                XEY = 1;
            // 设置X 等于Y的信号为1
            else
                XEY = 0;
            if (X > Y)
                XGY = 1;
            // 设置X 大于Y的信号为1
            else
                XGY = 0;
            if (X < Y)
                XSY = 1;
            // 设置X 小于Y的信号为1
            else
                XSY = 0;
        end

endmodule

多路器

实在没啥好讲的。要知道怎么用always写输出随着输入改变而改变的多路器。注意敏感列表，避免锁存器。~~case啊嗯~~

//下面是带使能控制信号nCS的数据位宽可由用户定义的(8位)八路数据通道选择器模块：
module Mux_8 (
    addr,
    in1,
    in2,
    in3,
    in4,
    in5,
    in6,
    in7,
    in8,
    Mout,
    nCS
);
    input [2:0] addr;
    input [width-1:0] in1, in2, in3, in4, in5, in6, in7, in8;
    input nCS;
    output [width-1:0] Mout;

    parameter width = 8;

    always @(addr, in1, in2, in3, in4, in5, in6, in7, in8, nCS) begin
        if (!nCS)
            //nCS 低电平使多路选择器工作
            case (addr)
                3'b000: Mout = in1;
                3'b001: Mout = in2;
                3'b010: Mout = in3;
                3'b011: Mout = in4;
                3'b100: Mout = in5;
                3'b101: Mout = in6;
                3'b110: Mout = in7;
                3'b111: Mout = in8;
            endcase

        else
            //nCS 高电平关闭多路选择器
            Mout = 0;
    end
    
endmodule

总线操作、流水线

总线操作

适当的总线位宽，配合适当并行度的运算逻辑和步骤就能显著地提髙专用信号处理逻辑电路的运算能力。

拼起来就完事了

流水线

流水线(pipeline)的设计方法在高性能、需要经常进行大规模运算的系统中得到广泛的应用，如CPU 、GPU等。高性能的DSP(数字信号处理)系统也使用了流水线设计技术。
所谓流水线设计实际上是把规模较大、层次较多的组合逻辑电路分为几个级，在每一级插入寄存器组并暂存中间数据。K 级的流水线就是从组合逻辑的输入到输出恰好有 K个寄存器组(分为 K 级，每一级都有一个寄存器组)，上一级的输出是下一级的输入而又无反馈的电路。

和嵌入式里也有相同的知识点，如加速比等。

例1

建立一个流水线电路的Verilog模型，计算三个输入流（a、b和c）中相应数值的平均值。流水线包括三个阶段：第一阶段对a和b的数值进行求和，并保存c的数值；第二阶段对保存的c的数值进行加法；第三阶段除以3。输入和输出都是有符号的定点数字，索引从5到-8。

module average_pipeline (
    output reg signed [5:-8] avg,
    input  signed     [5:-8] a,
    b,
    c,
    input                    clk
);

    wire signed [5:-8] a_plus_b, sum, sum_div_3;
    reg signed [5:-8] saved_a_plus_b, saved_c, saved_sum;

    assign a_plus_b = a + b;

    always @(posedge clk) begin  // Pipeline register 1
        saved_a_plus_b <= a_plus_b;
        saved_c        <= c;
    end

    assign sum = saved_a_plus_b + saved_c;

    always @(posedge clk)  // Pipeline register 2
        saved_sum <= sum;

    assign sum_div_3 = saved_sum * 14'b00000001010101;
    /*
    00000001010101等于十进制的85，而85接近于256/3
    这个乘法操作实际上是在进行一个近似的除以3的操作
    */

    always @(posedge clk)  // Pipeline register 3
        avg <= sum_div_3;

endmodule

为什么除法可以这样子写？

因为这是一个定点数：小数点在索引为0的地方。

索引[5:0]为整数部分，[0:-8]为小数部分，而14'b00000001010101的后八位正是0.333。（为什么？）

例2

4 级流水线方式的8位全加器

module pipeline (
    cout,
    sum,
    ina,
    inb,
    cin,
    clk
);
    output [7:0] sum;
    output cout;
    input [7:0] ina, inb;
    input cin, clk;

    reg [7:0] tempa, tempb, sum;
    reg tempci, firstco, secondco, thirdco, cout;
    reg [1:0] firsts, thirda, thirdb;
    reg [3:0] seconda, secondb, seconds;
    reg [5:0] firsta, firstb, thirds;

    always @(posedge clk) begin
        tempa  <= ina;
        tempb  <= inb;
        tempci <= cin;  //输入数据缓存
    end

    always @(posedge clk) begin
        {firstco, firsts} <= tempa[1:0] + tempb[1:0] +
            tempci;  //第一级加(低2位)
        firsta <= tempa[7:2];  //未参加计算的数据缓存
        firstb <= tempb[7:2];
    end

    always @(posedge clk) begin
        {secondco, seconds} <= {
            firsta[1:0] + firstb[1:0] + firstco, firsts
        };
        // 第二级加(第 2、3位相加)
        seconda <= firsta[5:2];  //数据缓存
        secondb <= firstb[5:2];
    end

    always @(posedge clk) begin
        {thirdco, thirds} <= {
            seconda[1:0] + secondb[1:0] + secondco, seconds
        };
        //第三级加(第4、5位相加)
        thirda <= seconda[3:2];  //数据缓存
        thirdb <= secondb[3:2];
    end

    always @(posedge clk) begin
        {cout, sum} <= {thirda[1:0] + thirdb[1:0] + thirdco, thirds};
        //第四级加(髙两位相加)
    end

endmodule

看的有点头大。理解最重要。

数据流控制

module multiplier
    ( output reg signed [7:-24] p_r, p_i,
     input signed [3:-12] a_r, a_i, b_r, b_i,
     input clk, reset, input_rdy);
    
    reg a_sel, b_sel, pp1_ce, pp2_ce, sub, p_r_ce, p_i_ce;
    wire signed [3:-12] a_operand, b_operand;
    wire signed [7:-24] pp, sum;
    reg signed [7:-24] pp1, pp2;
    ... ...
    
    assign a_operand = ~a_sel ? a_r : a_i;
    assign b_operand = ~b_sel ? b_r : b_i;
    assign pp = {{4{a_operand[3]}}, a_operand, 12'b0} *
    {{4{b_operand[3]}}, b_operand, 12'b0};
    
    always @(posedge clk) // Partial product 1 register
        if (pp1_ce)
            pp1 <= pp;
    
    always @(posedge clk) // Partial product 2 register
        if (pp2_ce)
            pp2 <= pp;
    
    assign sum = ~sub ? pp1 + pp2 : pp1 - pp2;
    
    always @(posedge clk) // Product real-part register
        if (p_r_ce)
            p_r <= sum;
    
    always @(posedge clk) // Product imaginary-part register
        if (p_i_ce)
            p_i <= sum;
    
    ......
    
endmodule

对于同步时序逻辑， $t_{co}+t_{pd}+t_{su}\lt t_c$ 。