//----------------------------------------------------------------------------- // The confidential and proprietary information contained in this file may // only be used by a person authorised under and to the extent permitted // by a subsisting licensing agreement from ARM Limited. // // (C) COPYRIGHT 2013-2014 ARM Limited. // ALL RIGHTS RESERVED // // This entire notice must be reproduced on all copies of this file // and copies of this file may only be made by a person if such person is // permitted to do so under the terms of a subsisting license agreement // from ARM Limited. // // Filename : $RCSfile: maia_cx_crypt2.v $ // Checked In : $Date: 2014-08-29 00:16:46 -0500 (Fri, 29 Aug 2014) $ // Revision : $Revision: 70482 $ // Release Information : Cortex-A72-r1p0-00rel0 // //----------------------------------------------------------------------------- // Verilog-2001 (IEEE Std 1364-2001) //----------------------------------------------------------------------------- //# //# Overview //# ======== //# // This block does the following operations: // - AES encrypt and decrypt operations: aesd, aese, aesmc, aesimc // - SHA single-cycle operations: sha1h, sha1su1, sha256su0 //# //# Module Declaration //# ================== //# `include "maia_header.v" module maia_cx_crypt2 ( //# //# Interface Signals //# ================= //# // Global inputs ck_gclkcx_crypt, cx_reset3, // Control inputs ival_e1_q, aesd_e1_q, aese_e1_q, aesmc_e1_q, aesimc_e1_q, aesdimc_e1_q, aesemc_e1_q, pmull_e1_q, sha1h_e1_q, sha1su1_e1_q, sha256su0_e1_q, // Data inputs qd, qn, // Outputs crypt2_out_e3_q, crypt2_active ); //# //# Interface Signals //# ================= //# // Global inputs input ck_gclkcx_crypt; input cx_reset3; // Control inputs input ival_e1_q; input aesd_e1_q; // aes encode input aese_e1_q; // aes decode input aesmc_e1_q; // ae smix columns input aesimc_e1_q; // aes inverse mix columns input aesdimc_e1_q; // aes decode superop input aesemc_e1_q; // aes encode superop input pmull_e1_q; // polynomial multiplication input sha1h_e1_q; // sha1 fixed rotate input sha1su1_e1_q; // sha1 schedule update 1 input sha256su0_e1_q; // sha256 schedule update 0 // Data inputs input [127:0] qd; input [127:0] qn; // Outputs output [127:0] crypt2_out_e3_q; output crypt2_active; //# //# Internal Signals - Automatic Declarations //# ========================================= //# wire [ 15: 0] aes_shf_e1; reg [ 15: 0] aes_shf_e2_q; wire [127: 0] aesd_e1; reg aesd_e2_q; wire aesd_or_e_e1; wire [127: 0] aesd_out; wire [ 15: 0] aesd_shf_e1; reg aesdimc_e2_q; wire [127: 0] aesdimc_out; wire [127: 0] aese_e1; reg aese_e2_q; wire [127: 0] aese_out; wire [ 15: 0] aese_shf_e1; reg aesemc_e2_q; wire [127: 0] aesemc_out; reg aesimc_e2_q; wire [127: 0] aesimc_in; wire [127: 0] aesimc_out; reg aesmc_e2_q; wire [127: 0] aesmc_in; wire [127: 0] aesmc_out; wire [127: 0] crypt2_d_e1; reg [127: 0] crypt2_d_e2_q; wire [127: 0] crypt2_out_e2; reg [127: 0] crypt2_out_e3_q; reg ival_e2_q; reg pmull_e2_q; wire [127: 0] pmull_out; wire [127: 0] qx_e1; wire [ 31: 0] sha1h_in_e1; wire [ 31: 0] sha1h_out_e1; wire [127: 0] sha1su1_out_e1; wire [127: 0] sha1su1_qdin_e1; wire [127: 0] sha1su1_qnin_e1; wire [127: 0] sha256su0_out_e1; wire sha_inst_e1; reg sha_inst_e2_q; //# //# Main Code //# ========= //# // // aes functions are all in the same block because of limited result bus bandwidth. // Mais CX has 3x64-bit result buses, and each of these instructions produces // a 128-bit result. Two instructions could be issued in a cycle, but there is // no value in doing this because they could not both write results. // // The single-cycle 2-input SHA instructions are in the same block because they have the same inputs // and latency as the aes instructions. // // Originally, all functions in this block had single-cycle latency, but CX is unable to make use // of single-cycle latency. To reduce area, functionality is spread across E1 and E2 // In particular, the AES SBOX and ISBOX functions are split into LUT(mult inverse) -> affine transform // & affine inverse transform -> LUT(mult inverse), so that they can share the same LUT. // E1 // 38% of this cycle is used up to drive qd and qn from the issq block. Therefore, the relatively // shallow SHA operations are performed in this cycle, along with some preliminary processing for AESE and AESD assign qx_e1[127:0] = {128{aesd_or_e_e1}} & (qd[127:0] ^ qn[127:0]); maia_cx_aese1 uaese1( .q (qx_e1[127:0]), .aese_out (aese_e1[127:0]), .aese_shf (aese_shf_e1[15:0]) ); maia_cx_aesd1 uaesd1( .q (qx_e1[127:0]), .aesd_out (aesd_e1[127:0]), .aesd_shf (aesd_shf_e1[15:0]) ); assign aesd_or_e_e1 = aesd_e1_q | aese_e1_q; // Perform sha functions in E1 to save pipeline flops // and reduce complexity of multiplexer in E2 assign sha1h_in_e1[31:0] = {32{sha1h_e1_q}} & qn[31:0]; maia_cx_sha1h usha1h( .qn (sha1h_in_e1[31:0]), .d (sha1h_out_e1[31:0]) ); assign sha1su1_qdin_e1[127:0] = {128{sha1su1_e1_q}} & qd[127:0]; assign sha1su1_qnin_e1[127:0] = {128{sha1su1_e1_q}} & qn[127:0]; maia_cx_sha1su1 usha1su1( .qd (sha1su1_qdin_e1[127:0]), .qn (sha1su1_qnin_e1[127:0]), .d (sha1su1_out_e1[127:0]) ); maia_cx_sha256su0 usha256su0( .qd (qd[127:0]), .qn (qn[127:0]), .d (sha256su0_out_e1[127:0]) ); assign sha_inst_e1 = sha1h_e1_q | sha1su1_e1_q | sha256su0_e1_q; assign crypt2_d_e1[127:0] = ({128{sha1h_e1_q}} & {{96{1'b0}}, sha1h_out_e1[31:0]}) | ({128{sha1su1_e1_q}} & sha1su1_out_e1[127:0]) | ({128{sha256su0_e1_q}} & sha256su0_out_e1[127:0]) | ({128{aese_e1_q}} & aese_e1[127:0]) | ({128{aesd_e1_q}} & aesd_e1[127:0]) | ({128{~(aesd_or_e_e1 | sha_inst_e1)}} & qn[127:0]); assign aes_shf_e1[15:0] = {16{aese_e1_q}} & aese_shf_e1[15:0] | {16{aesd_e1_q}} & aesd_shf_e1[15:0]; // reset flop(s) since feeds into active signal used for RCG // Macro DFF called // verilint flop_checks off always @(posedge ck_gclkcx_crypt or posedge cx_reset3) begin: uival_e2_q if (cx_reset3 == 1'b1) ival_e2_q <= `MAIA_DFF_DELAY {1{1'b0}}; `ifdef MAIA_XPROP_FLOP else if (cx_reset3==1'b0) ival_e2_q <= `MAIA_DFF_DELAY ival_e1_q; else ival_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; `else else ival_e2_q <= `MAIA_DFF_DELAY ival_e1_q; `endif end // verilint flop_checks on // end of Macro DFF // Macro DFF called // verilint flop_checks off always @(posedge ck_gclkcx_crypt) begin: ucrypt2_e2 if (ival_e1_q==1'b1) begin crypt2_d_e2_q[127:0] <= `MAIA_DFF_DELAY crypt2_d_e1[127:0]; aes_shf_e2_q[15:0] <= `MAIA_DFF_DELAY aes_shf_e1[15:0]; aesd_e2_q <= `MAIA_DFF_DELAY aesd_e1_q; aese_e2_q <= `MAIA_DFF_DELAY aese_e1_q; aesmc_e2_q <= `MAIA_DFF_DELAY aesmc_e1_q; aesimc_e2_q <= `MAIA_DFF_DELAY aesimc_e1_q; aesemc_e2_q <= `MAIA_DFF_DELAY aesemc_e1_q; aesdimc_e2_q <= `MAIA_DFF_DELAY aesdimc_e1_q; pmull_e2_q <= `MAIA_DFF_DELAY pmull_e1_q; sha_inst_e2_q <= `MAIA_DFF_DELAY sha_inst_e1; end `ifdef MAIA_XPROP_FLOP else if ((ival_e1_q==1'b0)); else begin crypt2_d_e2_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}}; aes_shf_e2_q[15:0] <= `MAIA_DFF_DELAY {16{1'bx}}; aesd_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; aese_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; aesmc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; aesimc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; aesemc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; aesdimc_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; pmull_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; sha_inst_e2_q <= `MAIA_DFF_DELAY {1{1'bx}}; end `endif end // verilint flop_checks on // end of Macro DFF // Enable data inputs for selected operation (glitch suppression in unused datapaths) assign aesmc_in[127:0] = {128{aesmc_e2_q }} & crypt2_d_e2_q[127:0]; assign aesimc_in[127:0] = {128{aesimc_e2_q}} & crypt2_d_e2_q[127:0]; maia_cx_aesed2 uaesed2( .aes_din (crypt2_d_e2_q[127:0]), .aes_shf (aes_shf_e2_q[15:0]), .aesd_out (aesd_out[127:0]), .aese_out (aese_out[127:0]), .aesemc_out (aesemc_out[127:0]), .aesdimc_out (aesdimc_out[127:0]) ); maia_cx_aesmc uaesmc( .d_in (aesmc_in[127:0]), .mc (aesmc_out[127:0]) ); maia_cx_aesimc uaesimc( .d_in (aesimc_in[127:0]), .imc (aesimc_out[127:0]) ); maia_cx_pmull upmull( .a_in (crypt2_d_e2_q[63:0]), .b_in (crypt2_d_e2_q[127:64]), .p_out (pmull_out[127:0]) ); assign crypt2_out_e2[127:0] = ({128{aesd_e2_q & ~aesdimc_e2_q}} & aesd_out[127:0]) | ({128{aese_e2_q & ~aesemc_e2_q}} & aese_out[127:0]) | ({128{aesmc_e2_q}} & aesmc_out[127:0]) | ({128{aesemc_e2_q}} & aesemc_out[127:0]) | ({128{aesimc_e2_q}} & aesimc_out[127:0]) | ({128{aesdimc_e2_q}} & aesdimc_out[127:0]) | ({128{sha_inst_e2_q}} & crypt2_d_e2_q[127:0]) | ({128{pmull_e2_q}} & pmull_out[127:0]); // Macro DFF called // verilint flop_checks off always @(posedge ck_gclkcx_crypt) begin: ucrypt2_e3 if (ival_e2_q==1'b1) begin crypt2_out_e3_q[127:0] <= `MAIA_DFF_DELAY crypt2_out_e2[127:0]; end `ifdef MAIA_XPROP_FLOP else if ((ival_e2_q==1'b0)); else begin crypt2_out_e3_q[127:0] <= `MAIA_DFF_DELAY {128{1'bx}}; end `endif end // verilint flop_checks on // end of Macro DFF //----------------------------------------------------------------------------- // regional clock gating (RCG) terms //----------------------------------------------------------------------------- assign crypt2_active = (ival_e1_q | ival_e2_q); endmodule //ARMAUTO UNDEF START `define MAIA_UNDEFINE `include "maia_header.v" `undef MAIA_UNDEFINE //ARMAUTO UNDEF END