LCOV - code coverage report
Current view: top level - cipher - serpent.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 321 417 77.0 %
Date: 2017-03-02 16:44:37 Functions: 17 17 100.0 %

          Line data    Source code
       1             : /* serpent.c - Implementation of the Serpent encryption algorithm.
       2             :  *      Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
       3             :  *
       4             :  * This file is part of Libgcrypt.
       5             :  *
       6             :  * Libgcrypt is free software; you can redistribute it and/or modify
       7             :  * it under the terms of the GNU Lesser general Public License as
       8             :  * published by the Free Software Foundation; either version 2.1 of
       9             :  * the License, or (at your option) any later version.
      10             :  *
      11             :  * Libgcrypt is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14             :  * GNU Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with this program; if not, write to the Free Software
      18             :  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
      19             :  * 02111-1307, USA.
      20             :  */
      21             : 
      22             : #include <config.h>
      23             : 
      24             : #include <string.h>
      25             : #include <stdio.h>
      26             : 
      27             : #include "types.h"
      28             : #include "g10lib.h"
      29             : #include "cipher.h"
      30             : #include "bithelp.h"
      31             : #include "bufhelp.h"
      32             : #include "cipher-internal.h"
      33             : #include "cipher-selftest.h"
      34             : 
      35             : 
      36             : /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
      37             : #undef USE_SSE2
      38             : #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      39             :     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
      40             : # define USE_SSE2 1
      41             : #endif
      42             : 
      43             : /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
      44             : #undef USE_AVX2
      45             : #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      46             :     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
      47             : # if defined(ENABLE_AVX2_SUPPORT)
      48             : #  define USE_AVX2 1
      49             : # endif
      50             : #endif
      51             : 
      52             : /* USE_NEON indicates whether to enable ARM NEON assembly code. */
      53             : #undef USE_NEON
      54             : #ifdef ENABLE_NEON_SUPPORT
      55             : # if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
      56             :      && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
      57             :      && defined(HAVE_GCC_INLINE_ASM_NEON)
      58             : #  define USE_NEON 1
      59             : # endif
      60             : #endif /*ENABLE_NEON_SUPPORT*/
      61             : 
      62             : /* Number of rounds per Serpent encrypt/decrypt operation.  */
      63             : #define ROUNDS 32
      64             : 
      65             : /* Magic number, used during generating of the subkeys.  */
      66             : #define PHI 0x9E3779B9
      67             : 
      68             : /* Serpent works on 128 bit blocks.  */
      69             : typedef u32 serpent_block_t[4];
      70             : 
      71             : /* Serpent key, provided by the user.  If the original key is shorter
      72             :    than 256 bits, it is padded.  */
      73             : typedef u32 serpent_key_t[8];
      74             : 
      75             : /* The key schedule consists of 33 128 bit subkeys.  */
      76             : typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
      77             : 
      78             : /* A Serpent context.  */
      79             : typedef struct serpent_context
      80             : {
      81             :   serpent_subkeys_t keys;       /* Generated subkeys.  */
      82             : 
      83             : #ifdef USE_AVX2
      84             :   int use_avx2;
      85             : #endif
      86             : #ifdef USE_NEON
      87             :   int use_neon;
      88             : #endif
      89             : } serpent_context_t;
      90             : 
      91             : 
      92             : /* Assembly implementations use SystemV ABI, ABI conversion and additional
      93             :  * stack to store XMM6-XMM15 needed on Win64. */
      94             : #undef ASM_FUNC_ABI
      95             : #if defined(USE_SSE2) || defined(USE_AVX2)
      96             : # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
      97             : #  define ASM_FUNC_ABI __attribute__((sysv_abi))
      98             : # else
      99             : #  define ASM_FUNC_ABI
     100             : # endif
     101             : #endif
     102             : 
     103             : 
     104             : #ifdef USE_SSE2
     105             : /* Assembler implementations of Serpent using SSE2.  Process 8 block in
     106             :    parallel.
     107             :  */
     108             : extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
     109             :                                        unsigned char *out,
     110             :                                        const unsigned char *in,
     111             :                                        unsigned char *ctr) ASM_FUNC_ABI;
     112             : 
     113             : extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
     114             :                                        unsigned char *out,
     115             :                                        const unsigned char *in,
     116             :                                        unsigned char *iv) ASM_FUNC_ABI;
     117             : 
     118             : extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
     119             :                                        unsigned char *out,
     120             :                                        const unsigned char *in,
     121             :                                        unsigned char *iv) ASM_FUNC_ABI;
     122             : 
     123             : extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
     124             :                                        unsigned char *out,
     125             :                                        const unsigned char *in,
     126             :                                        unsigned char *offset,
     127             :                                        unsigned char *checksum,
     128             :                                        const u64 Ls[8]) ASM_FUNC_ABI;
     129             : 
     130             : extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
     131             :                                        unsigned char *out,
     132             :                                        const unsigned char *in,
     133             :                                        unsigned char *offset,
     134             :                                        unsigned char *checksum,
     135             :                                        const u64 Ls[8]) ASM_FUNC_ABI;
     136             : 
     137             : extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
     138             :                                         const unsigned char *abuf,
     139             :                                         unsigned char *offset,
     140             :                                         unsigned char *checksum,
     141             :                                         const u64 Ls[8]) ASM_FUNC_ABI;
     142             : #endif
     143             : 
     144             : #ifdef USE_AVX2
     145             : /* Assembler implementations of Serpent using AVX2.  Process 16 block in
     146             :    parallel.
     147             :  */
     148             : extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
     149             :                                        unsigned char *out,
     150             :                                        const unsigned char *in,
     151             :                                        unsigned char *ctr) ASM_FUNC_ABI;
     152             : 
     153             : extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
     154             :                                        unsigned char *out,
     155             :                                        const unsigned char *in,
     156             :                                        unsigned char *iv) ASM_FUNC_ABI;
     157             : 
     158             : extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
     159             :                                        unsigned char *out,
     160             :                                        const unsigned char *in,
     161             :                                        unsigned char *iv) ASM_FUNC_ABI;
     162             : 
     163             : extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
     164             :                                        unsigned char *out,
     165             :                                        const unsigned char *in,
     166             :                                        unsigned char *offset,
     167             :                                        unsigned char *checksum,
     168             :                                        const u64 Ls[16]) ASM_FUNC_ABI;
     169             : 
     170             : extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
     171             :                                        unsigned char *out,
     172             :                                        const unsigned char *in,
     173             :                                        unsigned char *offset,
     174             :                                        unsigned char *checksum,
     175             :                                        const u64 Ls[16]) ASM_FUNC_ABI;
     176             : 
     177             : extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
     178             :                                         const unsigned char *abuf,
     179             :                                         unsigned char *offset,
     180             :                                         unsigned char *checksum,
     181             :                                         const u64 Ls[16]) ASM_FUNC_ABI;
     182             : #endif
     183             : 
     184             : #ifdef USE_NEON
     185             : /* Assembler implementations of Serpent using ARM NEON.  Process 8 block in
     186             :    parallel.
     187             :  */
     188             : extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
     189             :                                        unsigned char *out,
     190             :                                        const unsigned char *in,
     191             :                                        unsigned char *ctr);
     192             : 
     193             : extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
     194             :                                        unsigned char *out,
     195             :                                        const unsigned char *in,
     196             :                                        unsigned char *iv);
     197             : 
     198             : extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
     199             :                                        unsigned char *out,
     200             :                                        const unsigned char *in,
     201             :                                        unsigned char *iv);
     202             : 
     203             : extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
     204             :                                        unsigned char *out,
     205             :                                        const unsigned char *in,
     206             :                                        unsigned char *offset,
     207             :                                        unsigned char *checksum,
     208             :                                        const void *Ls[8]);
     209             : 
     210             : extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
     211             :                                        unsigned char *out,
     212             :                                        const unsigned char *in,
     213             :                                        unsigned char *offset,
     214             :                                        unsigned char *checksum,
     215             :                                        const void *Ls[8]);
     216             : 
     217             : extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
     218             :                                         const unsigned char *abuf,
     219             :                                         unsigned char *offset,
     220             :                                         unsigned char *checksum,
     221             :                                         const void *Ls[8]);
     222             : #endif
     223             : 
     224             : 
     225             : /* A prototype.  */
     226             : static const char *serpent_test (void);
     227             : 
     228             : 
     229             : /*
     230             :  * These are the S-Boxes of Serpent from following research paper.
     231             :  *
     232             :  *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
     233             :  *   (New York, New York, USA), p. 317–329, National Institute of Standards and
     234             :  *   Technology, 2000.
     235             :  *
     236             :  * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
     237             :  *
     238             :  */
     239             : 
     240             : #define SBOX0(r0, r1, r2, r3, w, x, y, z) \
     241             :   { \
     242             :     u32 r4; \
     243             :     \
     244             :     r3 ^= r0; r4 =  r1; \
     245             :     r1 &= r3; r4 ^= r2; \
     246             :     r1 ^= r0; r0 |= r3; \
     247             :     r0 ^= r4; r4 ^= r3; \
     248             :     r3 ^= r2; r2 |= r1; \
     249             :     r2 ^= r4; r4 = ~r4; \
     250             :     r4 |= r1; r1 ^= r3; \
     251             :     r1 ^= r4; r3 |= r0; \
     252             :     r1 ^= r3; r4 ^= r3; \
     253             :     \
     254             :     w = r1; x = r4; y = r2; z = r0; \
     255             :   }
     256             : 
     257             : #define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     258             :   { \
     259             :     u32 r4; \
     260             :     \
     261             :     r2 = ~r2; r4 =  r1; \
     262             :     r1 |= r0; r4 = ~r4; \
     263             :     r1 ^= r2; r2 |= r4; \
     264             :     r1 ^= r3; r0 ^= r4; \
     265             :     r2 ^= r0; r0 &= r3; \
     266             :     r4 ^= r0; r0 |= r1; \
     267             :     r0 ^= r2; r3 ^= r4; \
     268             :     r2 ^= r1; r3 ^= r0; \
     269             :     r3 ^= r1; \
     270             :     r2 &= r3; \
     271             :     r4 ^= r2; \
     272             :     \
     273             :     w = r0; x = r4; y = r1; z = r3; \
     274             :   }
     275             : 
     276             : #define SBOX1(r0, r1, r2, r3, w, x, y, z) \
     277             :   { \
     278             :     u32 r4; \
     279             :     \
     280             :     r0 = ~r0; r2 = ~r2; \
     281             :     r4 =  r0; r0 &= r1; \
     282             :     r2 ^= r0; r0 |= r3; \
     283             :     r3 ^= r2; r1 ^= r0; \
     284             :     r0 ^= r4; r4 |= r1; \
     285             :     r1 ^= r3; r2 |= r0; \
     286             :     r2 &= r4; r0 ^= r1; \
     287             :     r1 &= r2; \
     288             :     r1 ^= r0; r0 &= r2; \
     289             :     r0 ^= r4; \
     290             :     \
     291             :     w = r2; x = r0; y = r3; z = r1; \
     292             :   }
     293             : 
     294             : #define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     295             :   { \
     296             :     u32 r4; \
     297             :     \
     298             :     r4 =  r1; r1 ^= r3; \
     299             :     r3 &= r1; r4 ^= r2; \
     300             :     r3 ^= r0; r0 |= r1; \
     301             :     r2 ^= r3; r0 ^= r4; \
     302             :     r0 |= r2; r1 ^= r3; \
     303             :     r0 ^= r1; r1 |= r3; \
     304             :     r1 ^= r0; r4 = ~r4; \
     305             :     r4 ^= r1; r1 |= r0; \
     306             :     r1 ^= r0; \
     307             :     r1 |= r4; \
     308             :     r3 ^= r1; \
     309             :     \
     310             :     w = r4; x = r0; y = r3; z = r2; \
     311             :   }
     312             : 
     313             : #define SBOX2(r0, r1, r2, r3, w, x, y, z) \
     314             :   { \
     315             :     u32 r4; \
     316             :     \
     317             :     r4 =  r0; r0 &= r2; \
     318             :     r0 ^= r3; r2 ^= r1; \
     319             :     r2 ^= r0; r3 |= r4; \
     320             :     r3 ^= r1; r4 ^= r2; \
     321             :     r1 =  r3; r3 |= r4; \
     322             :     r3 ^= r0; r0 &= r1; \
     323             :     r4 ^= r0; r1 ^= r3; \
     324             :     r1 ^= r4; r4 = ~r4; \
     325             :     \
     326             :     w = r2; x = r3; y = r1; z = r4; \
     327             :   }
     328             : 
     329             : #define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     330             :   { \
     331             :     u32 r4; \
     332             :     \
     333             :     r2 ^= r3; r3 ^= r0; \
     334             :     r4 =  r3; r3 &= r2; \
     335             :     r3 ^= r1; r1 |= r2; \
     336             :     r1 ^= r4; r4 &= r3; \
     337             :     r2 ^= r3; r4 &= r0; \
     338             :     r4 ^= r2; r2 &= r1; \
     339             :     r2 |= r0; r3 = ~r3; \
     340             :     r2 ^= r3; r0 ^= r3; \
     341             :     r0 &= r1; r3 ^= r4; \
     342             :     r3 ^= r0; \
     343             :     \
     344             :     w = r1; x = r4; y = r2; z = r3; \
     345             :   }
     346             : 
     347             : #define SBOX3(r0, r1, r2, r3, w, x, y, z) \
     348             :   { \
     349             :     u32 r4; \
     350             :     \
     351             :     r4 =  r0; r0 |= r3; \
     352             :     r3 ^= r1; r1 &= r4; \
     353             :     r4 ^= r2; r2 ^= r3; \
     354             :     r3 &= r0; r4 |= r1; \
     355             :     r3 ^= r4; r0 ^= r1; \
     356             :     r4 &= r0; r1 ^= r3; \
     357             :     r4 ^= r2; r1 |= r0; \
     358             :     r1 ^= r2; r0 ^= r3; \
     359             :     r2 =  r1; r1 |= r3; \
     360             :     r1 ^= r0; \
     361             :     \
     362             :     w = r1; x = r2; y = r3; z = r4; \
     363             :   }
     364             : 
     365             : #define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     366             :   { \
     367             :     u32 r4; \
     368             :     \
     369             :     r4 =  r2; r2 ^= r1; \
     370             :     r0 ^= r2; r4 &= r2; \
     371             :     r4 ^= r0; r0 &= r1; \
     372             :     r1 ^= r3; r3 |= r4; \
     373             :     r2 ^= r3; r0 ^= r3; \
     374             :     r1 ^= r4; r3 &= r2; \
     375             :     r3 ^= r1; r1 ^= r0; \
     376             :     r1 |= r2; r0 ^= r3; \
     377             :     r1 ^= r4; \
     378             :     r0 ^= r1; \
     379             :     \
     380             :     w = r2; x = r1; y = r3; z = r0; \
     381             :   }
     382             : 
     383             : #define SBOX4(r0, r1, r2, r3, w, x, y, z) \
     384             :   { \
     385             :     u32 r4; \
     386             :     \
     387             :     r1 ^= r3; r3 = ~r3; \
     388             :     r2 ^= r3; r3 ^= r0; \
     389             :     r4 =  r1; r1 &= r3; \
     390             :     r1 ^= r2; r4 ^= r3; \
     391             :     r0 ^= r4; r2 &= r4; \
     392             :     r2 ^= r0; r0 &= r1; \
     393             :     r3 ^= r0; r4 |= r1; \
     394             :     r4 ^= r0; r0 |= r3; \
     395             :     r0 ^= r2; r2 &= r3; \
     396             :     r0 = ~r0; r4 ^= r2; \
     397             :     \
     398             :     w = r1; x = r4; y = r0; z = r3; \
     399             :   }
     400             : 
     401             : #define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     402             :   { \
     403             :     u32 r4; \
     404             :     \
     405             :     r4 =  r2; r2 &= r3; \
     406             :     r2 ^= r1; r1 |= r3; \
     407             :     r1 &= r0; r4 ^= r2; \
     408             :     r4 ^= r1; r1 &= r2; \
     409             :     r0 = ~r0; r3 ^= r4; \
     410             :     r1 ^= r3; r3 &= r0; \
     411             :     r3 ^= r2; r0 ^= r1; \
     412             :     r2 &= r0; r3 ^= r0; \
     413             :     r2 ^= r4; \
     414             :     r2 |= r3; r3 ^= r0; \
     415             :     r2 ^= r1; \
     416             :     \
     417             :     w = r0; x = r3; y = r2; z = r4; \
     418             :   }
     419             : 
     420             : #define SBOX5(r0, r1, r2, r3, w, x, y, z) \
     421             :   { \
     422             :     u32 r4; \
     423             :     \
     424             :     r0 ^= r1; r1 ^= r3; \
     425             :     r3 = ~r3; r4 =  r1; \
     426             :     r1 &= r0; r2 ^= r3; \
     427             :     r1 ^= r2; r2 |= r4; \
     428             :     r4 ^= r3; r3 &= r1; \
     429             :     r3 ^= r0; r4 ^= r1; \
     430             :     r4 ^= r2; r2 ^= r0; \
     431             :     r0 &= r3; r2 = ~r2; \
     432             :     r0 ^= r4; r4 |= r3; \
     433             :     r2 ^= r4; \
     434             :     \
     435             :     w = r1; x = r3; y = r0; z = r2; \
     436             :   }
     437             : 
     438             : #define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     439             :   { \
     440             :     u32 r4; \
     441             :     \
     442             :     r1 = ~r1; r4 =  r3; \
     443             :     r2 ^= r1; r3 |= r0; \
     444             :     r3 ^= r2; r2 |= r1; \
     445             :     r2 &= r0; r4 ^= r3; \
     446             :     r2 ^= r4; r4 |= r0; \
     447             :     r4 ^= r1; r1 &= r2; \
     448             :     r1 ^= r3; r4 ^= r2; \
     449             :     r3 &= r4; r4 ^= r1; \
     450             :     r3 ^= r4; r4 = ~r4; \
     451             :     r3 ^= r0; \
     452             :     \
     453             :     w = r1; x = r4; y = r3; z = r2; \
     454             :   }
     455             : 
     456             : #define SBOX6(r0, r1, r2, r3, w, x, y, z) \
     457             :   { \
     458             :     u32 r4; \
     459             :     \
     460             :     r2 = ~r2; r4 =  r3; \
     461             :     r3 &= r0; r0 ^= r4; \
     462             :     r3 ^= r2; r2 |= r4; \
     463             :     r1 ^= r3; r2 ^= r0; \
     464             :     r0 |= r1; r2 ^= r1; \
     465             :     r4 ^= r0; r0 |= r3; \
     466             :     r0 ^= r2; r4 ^= r3; \
     467             :     r4 ^= r0; r3 = ~r3; \
     468             :     r2 &= r4; \
     469             :     r2 ^= r3; \
     470             :     \
     471             :     w = r0; x = r1; y = r4; z = r2; \
     472             :   }
     473             : 
     474             : #define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     475             :   { \
     476             :     u32 r4; \
     477             :     \
     478             :     r0 ^= r2; r4 =  r2; \
     479             :     r2 &= r0; r4 ^= r3; \
     480             :     r2 = ~r2; r3 ^= r1; \
     481             :     r2 ^= r3; r4 |= r0; \
     482             :     r0 ^= r2; r3 ^= r4; \
     483             :     r4 ^= r1; r1 &= r3; \
     484             :     r1 ^= r0; r0 ^= r3; \
     485             :     r0 |= r2; r3 ^= r1; \
     486             :     r4 ^= r0; \
     487             :     \
     488             :     w = r1; x = r2; y = r4; z = r3; \
     489             :   }
     490             : 
     491             : #define SBOX7(r0, r1, r2, r3, w, x, y, z) \
     492             :   { \
     493             :     u32 r4; \
     494             :     \
     495             :     r4 =  r1; r1 |= r2; \
     496             :     r1 ^= r3; r4 ^= r2; \
     497             :     r2 ^= r1; r3 |= r4; \
     498             :     r3 &= r0; r4 ^= r2; \
     499             :     r3 ^= r1; r1 |= r4; \
     500             :     r1 ^= r0; r0 |= r4; \
     501             :     r0 ^= r2; r1 ^= r4; \
     502             :     r2 ^= r1; r1 &= r0; \
     503             :     r1 ^= r4; r2 = ~r2; \
     504             :     r2 |= r0; \
     505             :     r4 ^= r2; \
     506             :     \
     507             :     w = r4; x = r3; y = r1; z = r0; \
     508             :   }
     509             : 
     510             : #define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
     511             :   { \
     512             :     u32 r4; \
     513             :     \
     514             :     r4 =  r2; r2 ^= r0; \
     515             :     r0 &= r3; r4 |= r3; \
     516             :     r2 = ~r2; r3 ^= r1; \
     517             :     r1 |= r0; r0 ^= r2; \
     518             :     r2 &= r4; r3 &= r4; \
     519             :     r1 ^= r2; r2 ^= r0; \
     520             :     r0 |= r2; r4 ^= r1; \
     521             :     r0 ^= r3; r3 ^= r4; \
     522             :     r4 |= r0; r3 ^= r2; \
     523             :     r4 ^= r2; \
     524             :     \
     525             :     w = r3; x = r0; y = r1; z = r4; \
     526             :   }
     527             : 
     528             : /* XOR BLOCK1 into BLOCK0.  */
     529             : #define BLOCK_XOR(block0, block1) \
     530             :   {                               \
     531             :     block0[0] ^= block1[0];       \
     532             :     block0[1] ^= block1[1];       \
     533             :     block0[2] ^= block1[2];       \
     534             :     block0[3] ^= block1[3];       \
     535             :   }
     536             : 
     537             : /* Copy BLOCK_SRC to BLOCK_DST.  */
     538             : #define BLOCK_COPY(block_dst, block_src) \
     539             :   {                                      \
     540             :     block_dst[0] = block_src[0];         \
     541             :     block_dst[1] = block_src[1];         \
     542             :     block_dst[2] = block_src[2];         \
     543             :     block_dst[3] = block_src[3];         \
     544             :   }
     545             : 
     546             : /* Apply SBOX number WHICH to to the block found in ARRAY0, writing
     547             :    the output to the block found in ARRAY1.  */
     548             : #define SBOX(which, array0, array1)                         \
     549             :   SBOX##which (array0[0], array0[1], array0[2], array0[3],  \
     550             :                array1[0], array1[1], array1[2], array1[3]);
     551             : 
     552             : /* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
     553             :    the output to the block found in ARRAY1.  */
     554             : #define SBOX_INVERSE(which, array0, array1)                           \
     555             :   SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3],  \
     556             :                          array1[0], array1[1], array1[2], array1[3]);
     557             : 
     558             : /* Apply the linear transformation to BLOCK.  */
     559             : #define LINEAR_TRANSFORMATION(block)                  \
     560             :   {                                                   \
     561             :     block[0] = rol (block[0], 13);                    \
     562             :     block[2] = rol (block[2], 3);                     \
     563             :     block[1] = block[1] ^ block[0] ^ block[2];        \
     564             :     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
     565             :     block[1] = rol (block[1], 1);                     \
     566             :     block[3] = rol (block[3], 7);                     \
     567             :     block[0] = block[0] ^ block[1] ^ block[3];        \
     568             :     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
     569             :     block[0] = rol (block[0], 5);                     \
     570             :     block[2] = rol (block[2], 22);                    \
     571             :   }
     572             : 
     573             : /* Apply the inverse linear transformation to BLOCK.  */
     574             : #define LINEAR_TRANSFORMATION_INVERSE(block)          \
     575             :   {                                                   \
     576             :     block[2] = ror (block[2], 22);                    \
     577             :     block[0] = ror (block[0] , 5);                    \
     578             :     block[2] = block[2] ^ block[3] ^ (block[1] << 7); \
     579             :     block[0] = block[0] ^ block[1] ^ block[3];        \
     580             :     block[3] = ror (block[3], 7);                     \
     581             :     block[1] = ror (block[1], 1);                     \
     582             :     block[3] = block[3] ^ block[2] ^ (block[0] << 3); \
     583             :     block[1] = block[1] ^ block[0] ^ block[2];        \
     584             :     block[2] = ror (block[2], 3);                     \
     585             :     block[0] = ror (block[0], 13);                    \
     586             :   }
     587             : 
     588             : /* Apply a Serpent round to BLOCK, using the SBOX number WHICH and the
     589             :    subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary storage.
     590             :    This macro increments `round'.  */
     591             : #define ROUND(which, subkeys, block, block_tmp) \
     592             :   {                                             \
     593             :     BLOCK_XOR (block, subkeys[round]);          \
     594             :     round++;                                    \
     595             :     SBOX (which, block, block_tmp);             \
     596             :     LINEAR_TRANSFORMATION (block_tmp);          \
     597             :     BLOCK_COPY (block, block_tmp);              \
     598             :   }
     599             : 
     600             : /* Apply the last Serpent round to BLOCK, using the SBOX number WHICH
     601             :    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
     602             :    storage.  The result will be stored in BLOCK_TMP.  This macro
     603             :    increments `round'.  */
     604             : #define ROUND_LAST(which, subkeys, block, block_tmp) \
     605             :   {                                                  \
     606             :     BLOCK_XOR (block, subkeys[round]);               \
     607             :     round++;                                         \
     608             :     SBOX (which, block, block_tmp);                  \
     609             :     BLOCK_XOR (block_tmp, subkeys[round]);           \
     610             :     round++;                                         \
     611             :   }
     612             : 
     613             : /* Apply an inverse Serpent round to BLOCK, using the SBOX number
     614             :    WHICH and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as
     615             :    temporary storage.  This macro increments `round'.  */
     616             : #define ROUND_INVERSE(which, subkey, block, block_tmp) \
     617             :   {                                                    \
     618             :     LINEAR_TRANSFORMATION_INVERSE (block);             \
     619             :     SBOX_INVERSE (which, block, block_tmp);            \
     620             :     BLOCK_XOR (block_tmp, subkey[round]);              \
     621             :     round--;                                           \
     622             :     BLOCK_COPY (block, block_tmp);                     \
     623             :   }
     624             : 
     625             : /* Apply the first Serpent round to BLOCK, using the SBOX number WHICH
     626             :    and the subkeys contained in SUBKEYS.  Use BLOCK_TMP as temporary
     627             :    storage.  The result will be stored in BLOCK_TMP.  This macro
     628             :    increments `round'.  */
     629             : #define ROUND_FIRST_INVERSE(which, subkeys, block, block_tmp) \
     630             :   {                                                           \
     631             :     BLOCK_XOR (block, subkeys[round]);                        \
     632             :     round--;                                                  \
     633             :     SBOX_INVERSE (which, block, block_tmp);                   \
     634             :     BLOCK_XOR (block_tmp, subkeys[round]);                    \
     635             :     round--;                                                  \
     636             :   }
     637             : 
     638             : /* Convert the user provided key KEY of KEY_LENGTH bytes into the
     639             :    internally used format.  */
     640             : static void
     641        1387 : serpent_key_prepare (const byte *key, unsigned int key_length,
     642             :                      serpent_key_t key_prepared)
     643             : {
     644             :   int i;
     645             : 
     646             :   /* Copy key.  */
     647        1387 :   key_length /= 4;
     648        9681 :   for (i = 0; i < key_length; i++)
     649        8294 :     key_prepared[i] = buf_get_le32 (key + i * 4);
     650             : 
     651        1387 :   if (i < 8)
     652             :     {
     653             :       /* Key must be padded according to the Serpent
     654             :          specification.  */
     655         928 :       key_prepared[i] = 0x00000001;
     656             : 
     657        2802 :       for (i++; i < 8; i++)
     658        1874 :         key_prepared[i] = 0;
     659             :     }
     660        1387 : }
     661             : 
     662             : /* Derive the 33 subkeys from KEY and store them in SUBKEYS.  */
     663             : static void
     664        1387 : serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
     665             : {
     666             :   u32 w[8];             /* The `prekey'.  */
     667             :   u32 ws[4];
     668             :   u32 wt[4];
     669             : 
     670             :   /* Initialize with key values.  */
     671        1387 :   w[0] = key[0];
     672        1387 :   w[1] = key[1];
     673        1387 :   w[2] = key[2];
     674        1387 :   w[3] = key[3];
     675        1387 :   w[4] = key[4];
     676        1387 :   w[5] = key[5];
     677        1387 :   w[6] = key[6];
     678        1387 :   w[7] = key[7];
     679             : 
     680             :   /* Expand to intermediate key using the affine recurrence.  */
     681             : #define EXPAND_KEY4(wo, r)                                                     \
     682             :   wo[0] = w[(r+0)%8] =                                                         \
     683             :     rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
     684             :   wo[1] = w[(r+1)%8] =                                                         \
     685             :     rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
     686             :   wo[2] = w[(r+2)%8] =                                                         \
     687             :     rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
     688             :   wo[3] = w[(r+3)%8] =                                                         \
     689             :     rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
     690             : 
     691             : #define EXPAND_KEY(r)       \
     692             :   EXPAND_KEY4(ws, (r));     \
     693             :   EXPAND_KEY4(wt, (r + 4));
     694             : 
     695             :   /* Calculate subkeys via S-Boxes, in bitslice mode.  */
     696        1387 :   EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
     697        1387 :   EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
     698        1387 :   EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
     699        1387 :   EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
     700        1387 :   EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
     701        1387 :   EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
     702        1387 :   EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
     703        1387 :   EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
     704        1387 :   EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
     705        1387 :   EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
     706        1387 :   EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
     707        1387 :   EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
     708        1387 :   EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
     709        1387 :   EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
     710        1387 :   EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
     711        1387 :   EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
     712        1387 :   EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
     713             : 
     714        1387 :   wipememory (ws, sizeof (ws));
     715        1387 :   wipememory (wt, sizeof (wt));
     716        1387 :   wipememory (w, sizeof (w));
     717        1387 : }
     718             : 
     719             : /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits.  */
     720             : static void
     721        1387 : serpent_setkey_internal (serpent_context_t *context,
     722             :                          const byte *key, unsigned int key_length)
     723             : {
     724             :   serpent_key_t key_prepared;
     725             : 
     726        1387 :   serpent_key_prepare (key, key_length, key_prepared);
     727        1387 :   serpent_subkeys_generate (key_prepared, context->keys);
     728             : 
     729             : #ifdef USE_AVX2
     730        1387 :   context->use_avx2 = 0;
     731        1387 :   if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
     732             :     {
     733           0 :       context->use_avx2 = 1;
     734             :     }
     735             : #endif
     736             : 
     737             : #ifdef USE_NEON
     738             :   context->use_neon = 0;
     739             :   if ((_gcry_get_hw_features () & HWF_ARM_NEON))
     740             :     {
     741             :       context->use_neon = 1;
     742             :     }
     743             : #endif
     744             : 
     745        1387 :   wipememory (key_prepared, sizeof(key_prepared));
     746        1387 : }
     747             : 
     748             : /* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
     749             : static gcry_err_code_t
     750        1371 : serpent_setkey (void *ctx,
     751             :                 const byte *key, unsigned int key_length)
     752             : {
     753        1371 :   serpent_context_t *context = ctx;
     754             :   static const char *serpent_test_ret;
     755             :   static int serpent_init_done;
     756        1371 :   gcry_err_code_t ret = GPG_ERR_NO_ERROR;
     757             : 
     758        1371 :   if (! serpent_init_done)
     759             :     {
     760             :       /* Execute a self-test the first time, Serpent is used.  */
     761           4 :       serpent_init_done = 1;
     762           4 :       serpent_test_ret = serpent_test ();
     763           4 :       if (serpent_test_ret)
     764           0 :         log_error ("Serpent test failure: %s\n", serpent_test_ret);
     765             :     }
     766             : 
     767        1371 :   if (serpent_test_ret)
     768           0 :     ret = GPG_ERR_SELFTEST_FAILED;
     769             :   else
     770        1371 :     serpent_setkey_internal (context, key, key_length);
     771             : 
     772        1371 :   return ret;
     773             : }
     774             : 
     775             : static void
     776     5235724 : serpent_encrypt_internal (serpent_context_t *context,
     777             :                           const byte *input, byte *output)
     778             : {
     779             :   serpent_block_t b, b_next;
     780     5235724 :   int round = 0;
     781             : 
     782     5235724 :   b[0] = buf_get_le32 (input + 0);
     783     5235724 :   b[1] = buf_get_le32 (input + 4);
     784     5235724 :   b[2] = buf_get_le32 (input + 8);
     785     5235724 :   b[3] = buf_get_le32 (input + 12);
     786             : 
     787     5235724 :   ROUND (0, context->keys, b, b_next);
     788     5235724 :   ROUND (1, context->keys, b, b_next);
     789     5235724 :   ROUND (2, context->keys, b, b_next);
     790     5235724 :   ROUND (3, context->keys, b, b_next);
     791     5235724 :   ROUND (4, context->keys, b, b_next);
     792     5235724 :   ROUND (5, context->keys, b, b_next);
     793     5235724 :   ROUND (6, context->keys, b, b_next);
     794     5235724 :   ROUND (7, context->keys, b, b_next);
     795     5235724 :   ROUND (0, context->keys, b, b_next);
     796     5235724 :   ROUND (1, context->keys, b, b_next);
     797     5235724 :   ROUND (2, context->keys, b, b_next);
     798     5235724 :   ROUND (3, context->keys, b, b_next);
     799     5235724 :   ROUND (4, context->keys, b, b_next);
     800     5235724 :   ROUND (5, context->keys, b, b_next);
     801     5235724 :   ROUND (6, context->keys, b, b_next);
     802     5235724 :   ROUND (7, context->keys, b, b_next);
     803     5235724 :   ROUND (0, context->keys, b, b_next);
     804     5235724 :   ROUND (1, context->keys, b, b_next);
     805     5235724 :   ROUND (2, context->keys, b, b_next);
     806     5235724 :   ROUND (3, context->keys, b, b_next);
     807     5235724 :   ROUND (4, context->keys, b, b_next);
     808     5235724 :   ROUND (5, context->keys, b, b_next);
     809     5235724 :   ROUND (6, context->keys, b, b_next);
     810     5235724 :   ROUND (7, context->keys, b, b_next);
     811     5235724 :   ROUND (0, context->keys, b, b_next);
     812     5235724 :   ROUND (1, context->keys, b, b_next);
     813     5235724 :   ROUND (2, context->keys, b, b_next);
     814     5235724 :   ROUND (3, context->keys, b, b_next);
     815     5235724 :   ROUND (4, context->keys, b, b_next);
     816     5235724 :   ROUND (5, context->keys, b, b_next);
     817     5235724 :   ROUND (6, context->keys, b, b_next);
     818             : 
     819     5235724 :   ROUND_LAST (7, context->keys, b, b_next);
     820             : 
     821     5235724 :   buf_put_le32 (output + 0, b_next[0]);
     822     5235724 :   buf_put_le32 (output + 4, b_next[1]);
     823     5235724 :   buf_put_le32 (output + 8, b_next[2]);
     824     5235724 :   buf_put_le32 (output + 12, b_next[3]);
     825     5235724 : }
     826             : 
     827             : static void
     828      935948 : serpent_decrypt_internal (serpent_context_t *context,
     829             :                           const byte *input, byte *output)
     830             : {
     831             :   serpent_block_t b, b_next;
     832      935948 :   int round = ROUNDS;
     833             : 
     834      935948 :   b_next[0] = buf_get_le32 (input + 0);
     835      935948 :   b_next[1] = buf_get_le32 (input + 4);
     836      935948 :   b_next[2] = buf_get_le32 (input + 8);
     837      935948 :   b_next[3] = buf_get_le32 (input + 12);
     838             : 
     839      935948 :   ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
     840             : 
     841      935948 :   ROUND_INVERSE (6, context->keys, b, b_next);
     842      935948 :   ROUND_INVERSE (5, context->keys, b, b_next);
     843      935948 :   ROUND_INVERSE (4, context->keys, b, b_next);
     844      935948 :   ROUND_INVERSE (3, context->keys, b, b_next);
     845      935948 :   ROUND_INVERSE (2, context->keys, b, b_next);
     846      935948 :   ROUND_INVERSE (1, context->keys, b, b_next);
     847      935948 :   ROUND_INVERSE (0, context->keys, b, b_next);
     848      935948 :   ROUND_INVERSE (7, context->keys, b, b_next);
     849      935948 :   ROUND_INVERSE (6, context->keys, b, b_next);
     850      935948 :   ROUND_INVERSE (5, context->keys, b, b_next);
     851      935948 :   ROUND_INVERSE (4, context->keys, b, b_next);
     852      935948 :   ROUND_INVERSE (3, context->keys, b, b_next);
     853      935948 :   ROUND_INVERSE (2, context->keys, b, b_next);
     854      935948 :   ROUND_INVERSE (1, context->keys, b, b_next);
     855      935948 :   ROUND_INVERSE (0, context->keys, b, b_next);
     856      935948 :   ROUND_INVERSE (7, context->keys, b, b_next);
     857      935948 :   ROUND_INVERSE (6, context->keys, b, b_next);
     858      935948 :   ROUND_INVERSE (5, context->keys, b, b_next);
     859      935948 :   ROUND_INVERSE (4, context->keys, b, b_next);
     860      935948 :   ROUND_INVERSE (3, context->keys, b, b_next);
     861      935948 :   ROUND_INVERSE (2, context->keys, b, b_next);
     862      935948 :   ROUND_INVERSE (1, context->keys, b, b_next);
     863      935948 :   ROUND_INVERSE (0, context->keys, b, b_next);
     864      935948 :   ROUND_INVERSE (7, context->keys, b, b_next);
     865      935948 :   ROUND_INVERSE (6, context->keys, b, b_next);
     866      935948 :   ROUND_INVERSE (5, context->keys, b, b_next);
     867      935948 :   ROUND_INVERSE (4, context->keys, b, b_next);
     868      935948 :   ROUND_INVERSE (3, context->keys, b, b_next);
     869      935948 :   ROUND_INVERSE (2, context->keys, b, b_next);
     870      935948 :   ROUND_INVERSE (1, context->keys, b, b_next);
     871      935948 :   ROUND_INVERSE (0, context->keys, b, b_next);
     872             : 
     873      935948 :   buf_put_le32 (output + 0, b_next[0]);
     874      935948 :   buf_put_le32 (output + 4, b_next[1]);
     875      935948 :   buf_put_le32 (output + 8, b_next[2]);
     876      935948 :   buf_put_le32 (output + 12, b_next[3]);
     877      935948 : }
     878             : 
     879             : static unsigned int
     880     5165688 : serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
     881             : {
     882     5165688 :   serpent_context_t *context = ctx;
     883             : 
     884     5165688 :   serpent_encrypt_internal (context, buffer_in, buffer_out);
     885     5165688 :   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
     886             : }
     887             : 
     888             : static unsigned int
     889      917842 : serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
     890             : {
     891      917842 :   serpent_context_t *context = ctx;
     892             : 
     893      917842 :   serpent_decrypt_internal (context, buffer_in, buffer_out);
     894      917842 :   return /*burn_stack*/ (2 * sizeof (serpent_block_t));
     895             : }
     896             : 
     897             : 
     898             : 
     899             : /* Bulk encryption of complete blocks in CTR mode.  This function is only
     900             :    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
     901             :    of size sizeof(serpent_block_t). */
     902             : void
     903       35720 : _gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
     904             :                       void *outbuf_arg, const void *inbuf_arg,
     905             :                       size_t nblocks)
     906             : {
     907       35720 :   serpent_context_t *ctx = context;
     908       35720 :   unsigned char *outbuf = outbuf_arg;
     909       35720 :   const unsigned char *inbuf = inbuf_arg;
     910             :   unsigned char tmpbuf[sizeof(serpent_block_t)];
     911       35720 :   int burn_stack_depth = 2 * sizeof (serpent_block_t);
     912             :   int i;
     913             : 
     914             : #ifdef USE_AVX2
     915       35720 :   if (ctx->use_avx2)
     916             :     {
     917           0 :       int did_use_avx2 = 0;
     918             : 
     919             :       /* Process data in 16 block chunks. */
     920           0 :       while (nblocks >= 16)
     921             :         {
     922           0 :           _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
     923             : 
     924           0 :           nblocks -= 16;
     925           0 :           outbuf += 16 * sizeof(serpent_block_t);
     926           0 :           inbuf  += 16 * sizeof(serpent_block_t);
     927           0 :           did_use_avx2 = 1;
     928             :         }
     929             : 
     930           0 :       if (did_use_avx2)
     931             :         {
     932             :           /* serpent-avx2 assembly code does not use stack */
     933           0 :           if (nblocks == 0)
     934           0 :             burn_stack_depth = 0;
     935             :         }
     936             : 
     937             :       /* Use generic/sse2 code to handle smaller chunks... */
     938             :       /* TODO: use caching instead? */
     939             :     }
     940             : #endif
     941             : 
     942             : #ifdef USE_SSE2
     943             :   {
     944       35720 :     int did_use_sse2 = 0;
     945             : 
     946             :     /* Process data in 8 block chunks. */
     947      391368 :     while (nblocks >= 8)
     948             :       {
     949      319928 :         _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
     950             : 
     951      319928 :         nblocks -= 8;
     952      319928 :         outbuf += 8 * sizeof(serpent_block_t);
     953      319928 :         inbuf  += 8 * sizeof(serpent_block_t);
     954      319928 :         did_use_sse2 = 1;
     955             :       }
     956             : 
     957       35720 :     if (did_use_sse2)
     958             :       {
     959             :         /* serpent-sse2 assembly code does not use stack */
     960       31514 :         if (nblocks == 0)
     961       18000 :           burn_stack_depth = 0;
     962             :       }
     963             : 
     964             :     /* Use generic code to handle smaller chunks... */
     965             :     /* TODO: use caching instead? */
     966             :   }
     967             : #endif
     968             : 
     969             : #ifdef USE_NEON
     970             :   if (ctx->use_neon)
     971             :     {
     972             :       int did_use_neon = 0;
     973             : 
     974             :       /* Process data in 8 block chunks. */
     975             :       while (nblocks >= 8)
     976             :         {
     977             :           _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
     978             : 
     979             :           nblocks -= 8;
     980             :           outbuf += 8 * sizeof(serpent_block_t);
     981             :           inbuf  += 8 * sizeof(serpent_block_t);
     982             :           did_use_neon = 1;
     983             :         }
     984             : 
     985             :       if (did_use_neon)
     986             :         {
     987             :           /* serpent-neon assembly code does not use stack */
     988             :           if (nblocks == 0)
     989             :             burn_stack_depth = 0;
     990             :         }
     991             : 
     992             :       /* Use generic code to handle smaller chunks... */
     993             :       /* TODO: use caching instead? */
     994             :     }
     995             : #endif
     996             : 
     997       96704 :   for ( ;nblocks; nblocks-- )
     998             :     {
     999             :       /* Encrypt the counter. */
    1000       60984 :       serpent_encrypt_internal(ctx, ctr, tmpbuf);
    1001             :       /* XOR the input with the encrypted counter and store in output.  */
    1002       60984 :       buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
    1003       60984 :       outbuf += sizeof(serpent_block_t);
    1004       60984 :       inbuf  += sizeof(serpent_block_t);
    1005             :       /* Increment the counter.  */
    1006       61146 :       for (i = sizeof(serpent_block_t); i > 0; i--)
    1007             :         {
    1008       61142 :           ctr[i-1]++;
    1009       61142 :           if (ctr[i-1])
    1010       60980 :             break;
    1011             :         }
    1012             :     }
    1013             : 
    1014       35720 :   wipememory(tmpbuf, sizeof(tmpbuf));
    1015       35720 :   _gcry_burn_stack(burn_stack_depth);
    1016       35720 : }
    1017             : 
    1018             : /* Bulk decryption of complete blocks in CBC mode.  This function is only
    1019             :    intended for the bulk encryption feature of cipher.c. */
    1020             : void
    1021        9182 : _gcry_serpent_cbc_dec(void *context, unsigned char *iv,
    1022             :                       void *outbuf_arg, const void *inbuf_arg,
    1023             :                       size_t nblocks)
    1024             : {
    1025        9182 :   serpent_context_t *ctx = context;
    1026        9182 :   unsigned char *outbuf = outbuf_arg;
    1027        9182 :   const unsigned char *inbuf = inbuf_arg;
    1028             :   unsigned char savebuf[sizeof(serpent_block_t)];
    1029        9182 :   int burn_stack_depth = 2 * sizeof (serpent_block_t);
    1030             : 
    1031             : #ifdef USE_AVX2
    1032        9182 :   if (ctx->use_avx2)
    1033             :     {
    1034           0 :       int did_use_avx2 = 0;
    1035             : 
    1036             :       /* Process data in 16 block chunks. */
    1037           0 :       while (nblocks >= 16)
    1038             :         {
    1039           0 :           _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
    1040             : 
    1041           0 :           nblocks -= 16;
    1042           0 :           outbuf += 16 * sizeof(serpent_block_t);
    1043           0 :           inbuf  += 16 * sizeof(serpent_block_t);
    1044           0 :           did_use_avx2 = 1;
    1045             :         }
    1046             : 
    1047           0 :       if (did_use_avx2)
    1048             :         {
    1049             :           /* serpent-avx2 assembly code does not use stack */
    1050           0 :           if (nblocks == 0)
    1051           0 :             burn_stack_depth = 0;
    1052             :         }
    1053             : 
    1054             :       /* Use generic/sse2 code to handle smaller chunks... */
    1055             :     }
    1056             : #endif
    1057             : 
    1058             : #ifdef USE_SSE2
    1059             :   {
    1060        9182 :     int did_use_sse2 = 0;
    1061             : 
    1062             :     /* Process data in 8 block chunks. */
    1063      109192 :     while (nblocks >= 8)
    1064             :       {
    1065       90828 :         _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
    1066             : 
    1067       90828 :         nblocks -= 8;
    1068       90828 :         outbuf += 8 * sizeof(serpent_block_t);
    1069       90828 :         inbuf  += 8 * sizeof(serpent_block_t);
    1070       90828 :         did_use_sse2 = 1;
    1071             :       }
    1072             : 
    1073        9182 :     if (did_use_sse2)
    1074             :       {
    1075             :         /* serpent-sse2 assembly code does not use stack */
    1076        7936 :         if (nblocks == 0)
    1077        3000 :           burn_stack_depth = 0;
    1078             :       }
    1079             : 
    1080             :     /* Use generic code to handle smaller chunks... */
    1081             :   }
    1082             : #endif
    1083             : 
    1084             : #ifdef USE_NEON
    1085             :   if (ctx->use_neon)
    1086             :     {
    1087             :       int did_use_neon = 0;
    1088             : 
    1089             :       /* Process data in 8 block chunks. */
    1090             :       while (nblocks >= 8)
    1091             :         {
    1092             :           _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
    1093             : 
    1094             :           nblocks -= 8;
    1095             :           outbuf += 8 * sizeof(serpent_block_t);
    1096             :           inbuf  += 8 * sizeof(serpent_block_t);
    1097             :           did_use_neon = 1;
    1098             :         }
    1099             : 
    1100             :       if (did_use_neon)
    1101             :         {
    1102             :           /* serpent-neon assembly code does not use stack */
    1103             :           if (nblocks == 0)
    1104             :             burn_stack_depth = 0;
    1105             :         }
    1106             : 
    1107             :       /* Use generic code to handle smaller chunks... */
    1108             :     }
    1109             : #endif
    1110             : 
    1111       27272 :   for ( ;nblocks; nblocks-- )
    1112             :     {
    1113             :       /* INBUF is needed later and it may be identical to OUTBUF, so store
    1114             :          the intermediate result to SAVEBUF.  */
    1115       18090 :       serpent_decrypt_internal (ctx, inbuf, savebuf);
    1116             : 
    1117       18090 :       buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t));
    1118       18090 :       inbuf += sizeof(serpent_block_t);
    1119       18090 :       outbuf += sizeof(serpent_block_t);
    1120             :     }
    1121             : 
    1122        9182 :   wipememory(savebuf, sizeof(savebuf));
    1123        9182 :   _gcry_burn_stack(burn_stack_depth);
    1124        9182 : }
    1125             : 
    1126             : /* Bulk decryption of complete blocks in CFB mode.  This function is only
    1127             :    intended for the bulk encryption feature of cipher.c. */
    1128             : void
    1129        6272 : _gcry_serpent_cfb_dec(void *context, unsigned char *iv,
    1130             :                       void *outbuf_arg, const void *inbuf_arg,
    1131             :                       size_t nblocks)
    1132             : {
    1133        6272 :   serpent_context_t *ctx = context;
    1134        6272 :   unsigned char *outbuf = outbuf_arg;
    1135        6272 :   const unsigned char *inbuf = inbuf_arg;
    1136        6272 :   int burn_stack_depth = 2 * sizeof (serpent_block_t);
    1137             : 
    1138             : #ifdef USE_AVX2
    1139        6272 :   if (ctx->use_avx2)
    1140             :     {
    1141           0 :       int did_use_avx2 = 0;
    1142             : 
    1143             :       /* Process data in 16 block chunks. */
    1144           0 :       while (nblocks >= 16)
    1145             :         {
    1146           0 :           _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
    1147             : 
    1148           0 :           nblocks -= 16;
    1149           0 :           outbuf += 16 * sizeof(serpent_block_t);
    1150           0 :           inbuf  += 16 * sizeof(serpent_block_t);
    1151           0 :           did_use_avx2 = 1;
    1152             :         }
    1153             : 
    1154           0 :       if (did_use_avx2)
    1155             :         {
    1156             :           /* serpent-avx2 assembly code does not use stack */
    1157           0 :           if (nblocks == 0)
    1158           0 :             burn_stack_depth = 0;
    1159             :         }
    1160             : 
    1161             :       /* Use generic/sse2 code to handle smaller chunks... */
    1162             :     }
    1163             : #endif
    1164             : 
    1165             : #ifdef USE_SSE2
    1166             :   {
    1167        6272 :     int did_use_sse2 = 0;
    1168             : 
    1169             :     /* Process data in 8 block chunks. */
    1170       69964 :     while (nblocks >= 8)
    1171             :       {
    1172       57420 :         _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
    1173             : 
    1174       57420 :         nblocks -= 8;
    1175       57420 :         outbuf += 8 * sizeof(serpent_block_t);
    1176       57420 :         inbuf  += 8 * sizeof(serpent_block_t);
    1177       57420 :         did_use_sse2 = 1;
    1178             :       }
    1179             : 
    1180        6272 :     if (did_use_sse2)
    1181             :       {
    1182             :         /* serpent-sse2 assembly code does not use stack */
    1183        5662 :         if (nblocks == 0)
    1184        3000 :           burn_stack_depth = 0;
    1185             :       }
    1186             : 
    1187             :     /* Use generic code to handle smaller chunks... */
    1188             :   }
    1189             : #endif
    1190             : 
    1191             : #ifdef USE_NEON
    1192             :   if (ctx->use_neon)
    1193             :     {
    1194             :       int did_use_neon = 0;
    1195             : 
    1196             :       /* Process data in 8 block chunks. */
    1197             :       while (nblocks >= 8)
    1198             :         {
    1199             :           _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
    1200             : 
    1201             :           nblocks -= 8;
    1202             :           outbuf += 8 * sizeof(serpent_block_t);
    1203             :           inbuf  += 8 * sizeof(serpent_block_t);
    1204             :           did_use_neon = 1;
    1205             :         }
    1206             : 
    1207             :       if (did_use_neon)
    1208             :         {
    1209             :           /* serpent-neon assembly code does not use stack */
    1210             :           if (nblocks == 0)
    1211             :             burn_stack_depth = 0;
    1212             :         }
    1213             : 
    1214             :       /* Use generic code to handle smaller chunks... */
    1215             :     }
    1216             : #endif
    1217             : 
    1218       15308 :   for ( ;nblocks; nblocks-- )
    1219             :     {
    1220        9036 :       serpent_encrypt_internal(ctx, iv, iv);
    1221        9036 :       buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
    1222        9036 :       outbuf += sizeof(serpent_block_t);
    1223        9036 :       inbuf  += sizeof(serpent_block_t);
    1224             :     }
    1225             : 
    1226        6272 :   _gcry_burn_stack(burn_stack_depth);
    1227        6272 : }
    1228             : 
    1229             : /* Bulk encryption/decryption of complete blocks in OCB mode. */
    1230             : size_t
    1231       12588 : _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
    1232             :                         const void *inbuf_arg, size_t nblocks, int encrypt)
    1233             : {
    1234             : #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
    1235       12588 :   serpent_context_t *ctx = (void *)&c->context.c;
    1236       12588 :   unsigned char *outbuf = outbuf_arg;
    1237       12588 :   const unsigned char *inbuf = inbuf_arg;
    1238       12588 :   int burn_stack_depth = 2 * sizeof (serpent_block_t);
    1239       12588 :   u64 blkn = c->u_mode.ocb.data_nblocks;
    1240             : #else
    1241             :   (void)c;
    1242             :   (void)outbuf_arg;
    1243             :   (void)inbuf_arg;
    1244             :   (void)encrypt;
    1245             : #endif
    1246             : 
    1247             : #ifdef USE_AVX2
    1248       12588 :   if (ctx->use_avx2)
    1249             :     {
    1250           0 :       int did_use_avx2 = 0;
    1251             :       u64 Ls[16];
    1252           0 :       unsigned int n = 16 - (blkn % 16);
    1253             :       u64 *l;
    1254             :       int i;
    1255             : 
    1256           0 :       if (nblocks >= 16)
    1257             :         {
    1258           0 :           for (i = 0; i < 16; i += 8)
    1259             :             {
    1260             :               /* Use u64 to store pointers for x32 support (assembly function
    1261             :                * assumes 64-bit pointers). */
    1262           0 :               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1263           0 :               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1264           0 :               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1265           0 :               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1266           0 :               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1267           0 :               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1268           0 :               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1269             :             }
    1270             : 
    1271           0 :           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
    1272           0 :           l = &Ls[(15 + n) % 16];
    1273             : 
    1274             :           /* Process data in 16 block chunks. */
    1275           0 :           while (nblocks >= 16)
    1276             :             {
    1277           0 :               blkn += 16;
    1278           0 :               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
    1279             : 
    1280           0 :               if (encrypt)
    1281           0 :                 _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
    1282           0 :                                           c->u_ctr.ctr, Ls);
    1283             :               else
    1284           0 :                 _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
    1285           0 :                                           c->u_ctr.ctr, Ls);
    1286             : 
    1287           0 :               nblocks -= 16;
    1288           0 :               outbuf += 16 * sizeof(serpent_block_t);
    1289           0 :               inbuf  += 16 * sizeof(serpent_block_t);
    1290           0 :               did_use_avx2 = 1;
    1291             :             }
    1292             :         }
    1293             : 
    1294           0 :       if (did_use_avx2)
    1295             :         {
    1296             :           /* serpent-avx2 assembly code does not use stack */
    1297           0 :           if (nblocks == 0)
    1298           0 :             burn_stack_depth = 0;
    1299             :         }
    1300             : 
    1301             :       /* Use generic code to handle smaller chunks... */
    1302             :     }
    1303             : #endif
    1304             : 
    1305             : #ifdef USE_SSE2
    1306             :   {
    1307       12588 :     int did_use_sse2 = 0;
    1308             :     u64 Ls[8];
    1309       12588 :     unsigned int n = 8 - (blkn % 8);
    1310             :     u64 *l;
    1311             : 
    1312       12588 :     if (nblocks >= 8)
    1313             :       {
    1314             :         /* Use u64 to store pointers for x32 support (assembly function
    1315             :           * assumes 64-bit pointers). */
    1316       10728 :         Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1317       10728 :         Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1318       10728 :         Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1319       10728 :         Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1320       10728 :         Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1321       10728 :         Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1322       10728 :         Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1323       10728 :         l = &Ls[(7 + n) % 8];
    1324             : 
    1325             :         /* Process data in 8 block chunks. */
    1326     1303488 :         while (nblocks >= 8)
    1327             :           {
    1328     1282032 :             blkn += 8;
    1329     1282032 :             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
    1330             : 
    1331     1282032 :             if (encrypt)
    1332      641016 :               _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
    1333      641016 :                                           c->u_ctr.ctr, Ls);
    1334             :             else
    1335      641016 :               _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
    1336      641016 :                                           c->u_ctr.ctr, Ls);
    1337             : 
    1338     1282032 :             nblocks -= 8;
    1339     1282032 :             outbuf += 8 * sizeof(serpent_block_t);
    1340     1282032 :             inbuf  += 8 * sizeof(serpent_block_t);
    1341     1282032 :             did_use_sse2 = 1;
    1342             :           }
    1343             :       }
    1344             : 
    1345       12588 :     if (did_use_sse2)
    1346             :       {
    1347             :         /* serpent-sse2 assembly code does not use stack */
    1348       10728 :         if (nblocks == 0)
    1349        6036 :           burn_stack_depth = 0;
    1350             :       }
    1351             : 
    1352             :     /* Use generic code to handle smaller chunks... */
    1353             :   }
    1354             : #endif
    1355             : 
    1356             : #ifdef USE_NEON
    1357             :   if (ctx->use_neon)
    1358             :     {
    1359             :       int did_use_neon = 0;
    1360             :       const void *Ls[8];
    1361             :       unsigned int n = 8 - (blkn % 8);
    1362             :       const void **l;
    1363             : 
    1364             :       if (nblocks >= 8)
    1365             :         {
    1366             :           Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
    1367             :           Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
    1368             :           Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
    1369             :           Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
    1370             :           Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
    1371             :           Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
    1372             :           Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
    1373             :           l = &Ls[(7 + n) % 8];
    1374             : 
    1375             :           /* Process data in 8 block chunks. */
    1376             :           while (nblocks >= 8)
    1377             :             {
    1378             :               blkn += 8;
    1379             :               *l = ocb_get_l(c,  blkn - blkn % 8);
    1380             : 
    1381             :               if (encrypt)
    1382             :                 _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
    1383             :                                           c->u_ctr.ctr, Ls);
    1384             :               else
    1385             :                 _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
    1386             :                                           c->u_ctr.ctr, Ls);
    1387             : 
    1388             :               nblocks -= 8;
    1389             :               outbuf += 8 * sizeof(serpent_block_t);
    1390             :               inbuf  += 8 * sizeof(serpent_block_t);
    1391             :               did_use_neon = 1;
    1392             :             }
    1393             :         }
    1394             : 
    1395             :       if (did_use_neon)
    1396             :         {
    1397             :           /* serpent-neon assembly code does not use stack */
    1398             :           if (nblocks == 0)
    1399             :             burn_stack_depth = 0;
    1400             :         }
    1401             : 
    1402             :       /* Use generic code to handle smaller chunks... */
    1403             :     }
    1404             : #endif
    1405             : 
    1406             : #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
    1407       12588 :   c->u_mode.ocb.data_nblocks = blkn;
    1408             : 
    1409       12588 :   if (burn_stack_depth)
    1410        6552 :     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
    1411             : #endif
    1412             : 
    1413       12588 :   return nblocks;
    1414             : }
    1415             : 
    1416             : /* Bulk authentication of complete blocks in OCB mode. */
    1417             : size_t
    1418        1866 : _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
    1419             :                         size_t nblocks)
    1420             : {
    1421             : #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
    1422        1866 :   serpent_context_t *ctx = (void *)&c->context.c;
    1423        1866 :   const unsigned char *abuf = abuf_arg;
    1424        1866 :   int burn_stack_depth = 2 * sizeof(serpent_block_t);
    1425        1866 :   u64 blkn = c->u_mode.ocb.aad_nblocks;
    1426             : #else
    1427             :   (void)c;
    1428             :   (void)abuf_arg;
    1429             : #endif
    1430             : 
    1431             : #ifdef USE_AVX2
    1432        1866 :   if (ctx->use_avx2)
    1433             :     {
    1434           0 :       int did_use_avx2 = 0;
    1435             :       u64 Ls[16];
    1436           0 :       unsigned int n = 16 - (blkn % 16);
    1437             :       u64 *l;
    1438             :       int i;
    1439             : 
    1440           0 :       if (nblocks >= 16)
    1441             :         {
    1442           0 :           for (i = 0; i < 16; i += 8)
    1443             :             {
    1444             :               /* Use u64 to store pointers for x32 support (assembly function
    1445             :                * assumes 64-bit pointers). */
    1446           0 :               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1447           0 :               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1448           0 :               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1449           0 :               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1450           0 :               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1451           0 :               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1452           0 :               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1453             :             }
    1454             : 
    1455           0 :           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
    1456           0 :           l = &Ls[(15 + n) % 16];
    1457             : 
    1458             :           /* Process data in 16 block chunks. */
    1459           0 :           while (nblocks >= 16)
    1460             :             {
    1461           0 :               blkn += 16;
    1462           0 :               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
    1463             : 
    1464           0 :               _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
    1465           0 :                                           c->u_mode.ocb.aad_sum, Ls);
    1466             : 
    1467           0 :               nblocks -= 16;
    1468           0 :               abuf += 16 * sizeof(serpent_block_t);
    1469           0 :               did_use_avx2 = 1;
    1470             :             }
    1471             :         }
    1472             : 
    1473           0 :       if (did_use_avx2)
    1474             :         {
    1475             :           /* serpent-avx2 assembly code does not use stack */
    1476           0 :           if (nblocks == 0)
    1477           0 :             burn_stack_depth = 0;
    1478             :         }
    1479             : 
    1480             :       /* Use generic code to handle smaller chunks... */
    1481             :     }
    1482             : #endif
    1483             : 
    1484             : #ifdef USE_SSE2
    1485             :   {
    1486        1866 :     int did_use_sse2 = 0;
    1487             :     u64 Ls[8];
    1488        1866 :     unsigned int n = 8 - (blkn % 8);
    1489             :     u64 *l;
    1490             : 
    1491        1866 :     if (nblocks >= 8)
    1492             :       {
    1493             :         /* Use u64 to store pointers for x32 support (assembly function
    1494             :         * assumes 64-bit pointers). */
    1495        1668 :         Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1496        1668 :         Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1497        1668 :         Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1498        1668 :         Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1499        1668 :         Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1500        1668 :         Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1501        1668 :         Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1502        1668 :         l = &Ls[(7 + n) % 8];
    1503             : 
    1504             :         /* Process data in 8 block chunks. */
    1505     1207416 :         while (nblocks >= 8)
    1506             :           {
    1507     1204080 :             blkn += 8;
    1508     1204080 :             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
    1509             : 
    1510     1204080 :             _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
    1511     1204080 :                                         c->u_mode.ocb.aad_sum, Ls);
    1512             : 
    1513     1204080 :             nblocks -= 8;
    1514     1204080 :             abuf += 8 * sizeof(serpent_block_t);
    1515     1204080 :             did_use_sse2 = 1;
    1516             :           }
    1517             :       }
    1518             : 
    1519        1866 :     if (did_use_sse2)
    1520             :       {
    1521             :         /* serpent-avx2 assembly code does not use stack */
    1522        1668 :         if (nblocks == 0)
    1523          18 :           burn_stack_depth = 0;
    1524             :       }
    1525             : 
    1526             :     /* Use generic code to handle smaller chunks... */
    1527             :   }
    1528             : #endif
    1529             : 
    1530             : #ifdef USE_NEON
    1531             :   if (ctx->use_neon)
    1532             :     {
    1533             :       int did_use_neon = 0;
    1534             :       const void *Ls[8];
    1535             :       unsigned int n = 8 - (blkn % 8);
    1536             :       const void **l;
    1537             : 
    1538             :       if (nblocks >= 8)
    1539             :         {
    1540             :           Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
    1541             :           Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
    1542             :           Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
    1543             :           Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
    1544             :           Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
    1545             :           Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
    1546             :           Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
    1547             :           l = &Ls[(7 + n) % 8];
    1548             : 
    1549             :           /* Process data in 8 block chunks. */
    1550             :           while (nblocks >= 8)
    1551             :             {
    1552             :               blkn += 8;
    1553             :               *l = ocb_get_l(c, blkn - blkn % 8);
    1554             : 
    1555             :               _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
    1556             :                                           c->u_mode.ocb.aad_sum, Ls);
    1557             : 
    1558             :               nblocks -= 8;
    1559             :               abuf += 8 * sizeof(serpent_block_t);
    1560             :               did_use_neon = 1;
    1561             :             }
    1562             :         }
    1563             : 
    1564             :       if (did_use_neon)
    1565             :         {
    1566             :           /* serpent-neon assembly code does not use stack */
    1567             :           if (nblocks == 0)
    1568             :             burn_stack_depth = 0;
    1569             :         }
    1570             : 
    1571             :       /* Use generic code to handle smaller chunks... */
    1572             :     }
    1573             : #endif
    1574             : 
    1575             : #if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
    1576        1866 :   c->u_mode.ocb.aad_nblocks = blkn;
    1577             : 
    1578        1866 :   if (burn_stack_depth)
    1579        1848 :     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
    1580             : #endif
    1581             : 
    1582        1866 :   return nblocks;
    1583             : }
    1584             : 
    1585             : 
    1586             : 
    1587             : /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
    1588             :    encryption.  Returns NULL on success. */
    1589             : static const char*
    1590           4 : selftest_ctr_128 (void)
    1591             : {
    1592           4 :   const int nblocks = 16+8+1;
    1593           4 :   const int blocksize = sizeof(serpent_block_t);
    1594           4 :   const int context_size = sizeof(serpent_context_t);
    1595             : 
    1596           4 :   return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
    1597             :            &serpent_encrypt, &_gcry_serpent_ctr_enc, nblocks, blocksize,
    1598             :            context_size);
    1599             : }
    1600             : 
    1601             : 
    1602             : /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
    1603             :    Returns NULL on success. */
    1604             : static const char*
    1605           4 : selftest_cbc_128 (void)
    1606             : {
    1607           4 :   const int nblocks = 16+8+2;
    1608           4 :   const int blocksize = sizeof(serpent_block_t);
    1609           4 :   const int context_size = sizeof(serpent_context_t);
    1610             : 
    1611           4 :   return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
    1612             :            &serpent_encrypt, &_gcry_serpent_cbc_dec, nblocks, blocksize,
    1613             :            context_size);
    1614             : }
    1615             : 
    1616             : 
    1617             : /* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
    1618             :    Returns NULL on success. */
    1619             : static const char*
    1620           4 : selftest_cfb_128 (void)
    1621             : {
    1622           4 :   const int nblocks = 16+8+2;
    1623           4 :   const int blocksize = sizeof(serpent_block_t);
    1624           4 :   const int context_size = sizeof(serpent_context_t);
    1625             : 
    1626           4 :   return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
    1627             :            &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize,
    1628             :            context_size);
    1629             : }
    1630             : 
    1631             : 
    1632             : /* Serpent test.  */
    1633             : 
    1634             : static const char *
    1635           4 : serpent_test (void)
    1636             : {
    1637             :   serpent_context_t context;
    1638             :   unsigned char scratch[16];
    1639             :   unsigned int i;
    1640             :   const char *r;
    1641             : 
    1642             :   static struct test
    1643             :   {
    1644             :     int key_length;
    1645             :     unsigned char key[32];
    1646             :     unsigned char text_plain[16];
    1647             :     unsigned char text_cipher[16];
    1648             :   } test_data[] =
    1649             :     {
    1650             :       {
    1651             :         16,
    1652             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
    1653             :         "\xD2\x9D\x57\x6F\xCE\xA3\xA3\xA7\xED\x90\x99\xF2\x92\x73\xD7\x8E",
    1654             :         "\xB2\x28\x8B\x96\x8A\xE8\xB0\x86\x48\xD1\xCE\x96\x06\xFD\x99\x2D"
    1655             :       },
    1656             :       {
    1657             :         24,
    1658             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
    1659             :         "\x00\x00\x00\x00\x00\x00\x00\x00",
    1660             :         "\xD2\x9D\x57\x6F\xCE\xAB\xA3\xA7\xED\x98\x99\xF2\x92\x7B\xD7\x8E",
    1661             :         "\x13\x0E\x35\x3E\x10\x37\xC2\x24\x05\xE8\xFA\xEF\xB2\xC3\xC3\xE9"
    1662             :       },
    1663             :       {
    1664             :         32,
    1665             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
    1666             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
    1667             :         "\xD0\x95\x57\x6F\xCE\xA3\xE3\xA7\xED\x98\xD9\xF2\x90\x73\xD7\x8E",
    1668             :         "\xB9\x0E\xE5\x86\x2D\xE6\x91\x68\xF2\xBD\xD5\x12\x5B\x45\x47\x2B"
    1669             :       },
    1670             :       {
    1671             :         32,
    1672             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
    1673             :         "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
    1674             :         "\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00",
    1675             :         "\x20\x61\xA4\x27\x82\xBD\x52\xEC\x69\x1E\xC3\x83\xB0\x3B\xA7\x7C"
    1676             :       },
    1677             :       {
    1678             :         0
    1679             :       },
    1680             :     };
    1681             : 
    1682          20 :   for (i = 0; test_data[i].key_length; i++)
    1683             :     {
    1684          16 :       serpent_setkey_internal (&context, test_data[i].key,
    1685          16 :                                test_data[i].key_length);
    1686          16 :       serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
    1687             : 
    1688          16 :       if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
    1689           0 :         switch (test_data[i].key_length)
    1690             :           {
    1691             :           case 16:
    1692           0 :             return "Serpent-128 test encryption failed.";
    1693             :           case  24:
    1694           0 :             return "Serpent-192 test encryption failed.";
    1695             :           case 32:
    1696           0 :             return "Serpent-256 test encryption failed.";
    1697             :           }
    1698             : 
    1699          16 :     serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
    1700          16 :     if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
    1701           0 :       switch (test_data[i].key_length)
    1702             :         {
    1703             :         case 16:
    1704           0 :           return "Serpent-128 test decryption failed.";
    1705             :         case  24:
    1706           0 :           return "Serpent-192 test decryption failed.";
    1707             :         case 32:
    1708           0 :           return "Serpent-256 test decryption failed.";
    1709             :         }
    1710             :     }
    1711             : 
    1712           4 :   if ( (r = selftest_ctr_128 ()) )
    1713           0 :     return r;
    1714             : 
    1715           4 :   if ( (r = selftest_cbc_128 ()) )
    1716           0 :     return r;
    1717             : 
    1718           4 :   if ( (r = selftest_cfb_128 ()) )
    1719           0 :     return r;
    1720             : 
    1721           4 :   return NULL;
    1722             : }
    1723             : 
    1724             : 
    1725             : static gcry_cipher_oid_spec_t serpent128_oids[] =
    1726             :   {
    1727             :     {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
    1728             :     {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
    1729             :     {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
    1730             :     {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
    1731             :     { NULL }
    1732             :   };
    1733             : 
    1734             : static gcry_cipher_oid_spec_t serpent192_oids[] =
    1735             :   {
    1736             :     {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
    1737             :     {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
    1738             :     {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
    1739             :     {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
    1740             :     { NULL }
    1741             :   };
    1742             : 
    1743             : static gcry_cipher_oid_spec_t serpent256_oids[] =
    1744             :   {
    1745             :     {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
    1746             :     {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
    1747             :     {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
    1748             :     {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
    1749             :     { NULL }
    1750             :   };
    1751             : 
    1752             : static const char *serpent128_aliases[] =
    1753             :   {
    1754             :     "SERPENT",
    1755             :     "SERPENT-128",
    1756             :     NULL
    1757             :   };
    1758             : static const char *serpent192_aliases[] =
    1759             :   {
    1760             :     "SERPENT-192",
    1761             :     NULL
    1762             :   };
    1763             : static const char *serpent256_aliases[] =
    1764             :   {
    1765             :     "SERPENT-256",
    1766             :     NULL
    1767             :   };
    1768             : 
    1769             : gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
    1770             :   {
    1771             :     GCRY_CIPHER_SERPENT128, {0, 0},
    1772             :     "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
    1773             :     sizeof (serpent_context_t),
    1774             :     serpent_setkey, serpent_encrypt, serpent_decrypt
    1775             :   };
    1776             : 
    1777             : gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
    1778             :   {
    1779             :     GCRY_CIPHER_SERPENT192, {0, 0},
    1780             :     "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
    1781             :     sizeof (serpent_context_t),
    1782             :     serpent_setkey, serpent_encrypt, serpent_decrypt
    1783             :   };
    1784             : 
    1785             : gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
    1786             :   {
    1787             :     GCRY_CIPHER_SERPENT256, {0, 0},
    1788             :     "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
    1789             :     sizeof (serpent_context_t),
    1790             :     serpent_setkey, serpent_encrypt, serpent_decrypt
    1791             :   };

Generated by: LCOV version 1.13