LCOV - code coverage report
Current view: top level - cipher - twofish.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 247 340 72.6 %
Date: 2017-03-02 16:44:37 Functions: 21 21 100.0 %

          Line data    Source code
       1             : /* Twofish for GPG
       2             :  * Copyright (C) 1998, 2002, 2003 Free Software Foundation, Inc.
       3             :  * Written by Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
       4             :  * 256-bit key length added March 20, 1999
       5             :  * Some modifications to reduce the text size by Werner Koch, April, 1998
       6             :  *
       7             :  * This file is part of Libgcrypt.
       8             :  *
       9             :  * Libgcrypt is free software; you can redistribute it and/or modify
      10             :  * it under the terms of the GNU Lesser General Public License as
      11             :  * published by the Free Software Foundation; either version 2.1 of
      12             :  * the License, or (at your option) any later version.
      13             :  *
      14             :  * Libgcrypt is distributed in the hope that it will be useful,
      15             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :  * GNU Lesser General Public License for more details.
      18             :  *
      19             :  * You should have received a copy of the GNU Lesser General Public
      20             :  * License along with this program; if not, write to the Free Software
      21             :  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
      22             :  ********************************************************************
      23             :  *
      24             :  * This code is a "clean room" implementation, written from the paper
      25             :  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
      26             :  * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
      27             :  * through http://www.counterpane.com/twofish.html
      28             :  *
      29             :  * For background information on multiplication in finite fields, used for
      30             :  * the matrix operations in the key schedule, see the book _Contemporary
      31             :  * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
      32             :  * Third Edition.
      33             :  *
      34             :  * Only the 128- and 256-bit key sizes are supported.  This code is intended
      35             :  * for GNU C on a 32-bit system, but it should work almost anywhere.  Loops
      36             :  * are unrolled, precomputation tables are used, etc., for maximum speed at
      37             :  * some cost in memory consumption. */
      38             : 
      39             : #include <config.h>
      40             : #include <stdio.h>
      41             : #include <stdlib.h>
      42             : #include <string.h> /* for memcmp() */
      43             : 
      44             : #include "types.h"  /* for byte and u32 typedefs */
      45             : #include "g10lib.h"
      46             : #include "cipher.h"
      47             : #include "bufhelp.h"
      48             : #include "cipher-internal.h"
      49             : #include "cipher-selftest.h"
      50             : 
      51             : 
      52             : #define TWOFISH_BLOCKSIZE 16
      53             : 
      54             : 
      55             : /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
      56             : #undef USE_AMD64_ASM
      57             : #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      58             :     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
      59             : # define USE_AMD64_ASM 1
      60             : #endif
      61             : 
      62             : /* USE_ARM_ASM indicates whether to use ARM assembly code. */
      63             : #undef USE_ARM_ASM
      64             : #if defined(__ARMEL__)
      65             : # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
      66             : #  define USE_ARM_ASM 1
      67             : # endif
      68             : #endif
      69             : # if defined(__AARCH64EL__)
      70             : #  ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
      71             : #   define USE_ARM_ASM 1
      72             : #  endif
      73             : # endif
      74             : 
      75             : /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
      76             : #undef USE_AVX2
      77             : #if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
      78             :     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
      79             : # if defined(ENABLE_AVX2_SUPPORT)
      80             : #  define USE_AVX2 1
      81             : # endif
      82             : #endif
      83             : 
      84             : 
      85             : /* Prototype for the self-test function. */
      86             : static const char *selftest(void);
      87             : 
      88             : /* Structure for an expanded Twofish key.  s contains the key-dependent
      89             :  * S-boxes composed with the MDS matrix; w contains the eight "whitening"
      90             :  * subkeys, K[0] through K[7].  k holds the remaining, "round" subkeys.  Note
      91             :  * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
      92             : typedef struct {
      93             :    u32 s[4][256], w[8], k[32];
      94             : 
      95             : #ifdef USE_AVX2
      96             :   int use_avx2;
      97             : #endif
      98             : } TWOFISH_context;
      99             : 
     100             : 
     101             : /* Assembly implementations use SystemV ABI, ABI conversion and additional
     102             :  * stack to store XMM6-XMM15 needed on Win64. */
     103             : #undef ASM_FUNC_ABI
     104             : #if defined(USE_AVX2)
     105             : # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     106             : #  define ASM_FUNC_ABI __attribute__((sysv_abi))
     107             : # else
     108             : #  define ASM_FUNC_ABI
     109             : # endif
     110             : #endif
     111             : 
     112             : 
     113             : /* These two tables are the q0 and q1 permutations, exactly as described in
     114             :  * the Twofish paper. */
     115             : 
     116             : static const byte q0[256] = {
     117             :    0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76, 0x9A, 0x92, 0x80, 0x78,
     118             :    0xE4, 0xDD, 0xD1, 0x38, 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
     119             :    0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48, 0xF2, 0xD0, 0x8B, 0x30,
     120             :    0x84, 0x54, 0xDF, 0x23, 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
     121             :    0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C, 0xA6, 0xEB, 0xA5, 0xBE,
     122             :    0x16, 0x0C, 0xE3, 0x61, 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
     123             :    0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1, 0xE1, 0xE6, 0xBD, 0x45,
     124             :    0xE2, 0xF4, 0xB6, 0x66, 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
     125             :    0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA, 0xEA, 0x77, 0x39, 0xAF,
     126             :    0x33, 0xC9, 0x62, 0x71, 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
     127             :    0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7, 0xA1, 0x1D, 0xAA, 0xED,
     128             :    0x06, 0x70, 0xB2, 0xD2, 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
     129             :    0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB, 0x9E, 0x9C, 0x52, 0x1B,
     130             :    0x5F, 0x93, 0x0A, 0xEF, 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
     131             :    0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64, 0x2A, 0xCE, 0xCB, 0x2F,
     132             :    0xFC, 0x97, 0x05, 0x7A, 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
     133             :    0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02, 0xB8, 0xDA, 0xB0, 0x17,
     134             :    0x55, 0x1F, 0x8A, 0x7D, 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
     135             :    0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34, 0x6E, 0x50, 0xDE, 0x68,
     136             :    0x65, 0xBC, 0xDB, 0xF8, 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
     137             :    0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00, 0x6F, 0x9D, 0x36, 0x42,
     138             :    0x4A, 0x5E, 0xC1, 0xE0
     139             : };
     140             : 
     141             : static const byte q1[256] = {
     142             :    0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8, 0x4A, 0xD3, 0xE6, 0x6B,
     143             :    0x45, 0x7D, 0xE8, 0x4B, 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
     144             :    0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F, 0x5E, 0xBA, 0xAE, 0x5B,
     145             :    0x8A, 0x00, 0xBC, 0x9D, 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
     146             :    0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3, 0xB2, 0x73, 0x4C, 0x54,
     147             :    0x92, 0x74, 0x36, 0x51, 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
     148             :    0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C, 0x13, 0x95, 0x9C, 0xC7,
     149             :    0x24, 0x46, 0x3B, 0x70, 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
     150             :    0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC, 0x03, 0x6F, 0x08, 0xBF,
     151             :    0x40, 0xE7, 0x2B, 0xE2, 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
     152             :    0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17, 0x66, 0x94, 0xA1, 0x1D,
     153             :    0x3D, 0xF0, 0xDE, 0xB3, 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
     154             :    0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49, 0x81, 0x88, 0xEE, 0x21,
     155             :    0xC4, 0x1A, 0xEB, 0xD9, 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
     156             :    0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48, 0x4F, 0xF2, 0x65, 0x8E,
     157             :    0x78, 0x5C, 0x58, 0x19, 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
     158             :    0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5, 0xCE, 0xE9, 0x68, 0x44,
     159             :    0xE0, 0x4D, 0x43, 0x69, 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
     160             :    0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC, 0x22, 0xC9, 0xC0, 0x9B,
     161             :    0x89, 0xD4, 0xED, 0xAB, 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
     162             :    0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2, 0x16, 0x25, 0x86, 0x56,
     163             :    0x55, 0x09, 0xBE, 0x91
     164             : };
     165             : 
     166             : /* These MDS tables are actually tables of MDS composed with q0 and q1,
     167             :  * because it is only ever used that way and we can save some time by
     168             :  * precomputing.  Of course the main saving comes from precomputing the
     169             :  * GF(2^8) multiplication involved in the MDS matrix multiply; by looking
     170             :  * things up in these tables we reduce the matrix multiply to four lookups
     171             :  * and three XORs.  Semi-formally, the definition of these tables is:
     172             :  * mds[0][i] = MDS (q1[i] 0 0 0)^T  mds[1][i] = MDS (0 q0[i] 0 0)^T
     173             :  * mds[2][i] = MDS (0 0 q1[i] 0)^T  mds[3][i] = MDS (0 0 0 q0[i])^T
     174             :  * where ^T means "transpose", the matrix multiply is performed in GF(2^8)
     175             :  * represented as GF(2)[x]/v(x) where v(x)=x^8+x^6+x^5+x^3+1 as described
     176             :  * by Schneier et al, and I'm casually glossing over the byte/word
     177             :  * conversion issues. */
     178             : 
     179             : static const u32 mds[4][256] = {
     180             :    {0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
     181             :     0xE2E22BFB, 0x9E9EFAC8, 0xC9C9EC4A, 0xD4D409D3, 0x18186BE6, 0x1E1E9F6B,
     182             :     0x98980E45, 0xB2B2387D, 0xA6A6D2E8, 0x2626B74B, 0x3C3C57D6, 0x93938A32,
     183             :     0x8282EED8, 0x525298FD, 0x7B7BD437, 0xBBBB3771, 0x5B5B97F1, 0x474783E1,
     184             :     0x24243C30, 0x5151E20F, 0xBABAC6F8, 0x4A4AF31B, 0xBFBF4887, 0x0D0D70FA,
     185             :     0xB0B0B306, 0x7575DE3F, 0xD2D2FD5E, 0x7D7D20BA, 0x666631AE, 0x3A3AA35B,
     186             :     0x59591C8A, 0x00000000, 0xCDCD93BC, 0x1A1AE09D, 0xAEAE2C6D, 0x7F7FABC1,
     187             :     0x2B2BC7B1, 0xBEBEB90E, 0xE0E0A080, 0x8A8A105D, 0x3B3B52D2, 0x6464BAD5,
     188             :     0xD8D888A0, 0xE7E7A584, 0x5F5FE807, 0x1B1B1114, 0x2C2CC2B5, 0xFCFCB490,
     189             :     0x3131272C, 0x808065A3, 0x73732AB2, 0x0C0C8173, 0x79795F4C, 0x6B6B4154,
     190             :     0x4B4B0292, 0x53536974, 0x94948F36, 0x83831F51, 0x2A2A3638, 0xC4C49CB0,
     191             :     0x2222C8BD, 0xD5D5F85A, 0xBDBDC3FC, 0x48487860, 0xFFFFCE62, 0x4C4C0796,
     192             :     0x4141776C, 0xC7C7E642, 0xEBEB24F7, 0x1C1C1410, 0x5D5D637C, 0x36362228,
     193             :     0x6767C027, 0xE9E9AF8C, 0x4444F913, 0x1414EA95, 0xF5F5BB9C, 0xCFCF18C7,
     194             :     0x3F3F2D24, 0xC0C0E346, 0x7272DB3B, 0x54546C70, 0x29294CCA, 0xF0F035E3,
     195             :     0x0808FE85, 0xC6C617CB, 0xF3F34F11, 0x8C8CE4D0, 0xA4A45993, 0xCACA96B8,
     196             :     0x68683BA6, 0xB8B84D83, 0x38382820, 0xE5E52EFF, 0xADAD569F, 0x0B0B8477,
     197             :     0xC8C81DC3, 0x9999FFCC, 0x5858ED03, 0x19199A6F, 0x0E0E0A08, 0x95957EBF,
     198             :     0x70705040, 0xF7F730E7, 0x6E6ECF2B, 0x1F1F6EE2, 0xB5B53D79, 0x09090F0C,
     199             :     0x616134AA, 0x57571682, 0x9F9F0B41, 0x9D9D803A, 0x111164EA, 0x2525CDB9,
     200             :     0xAFAFDDE4, 0x4545089A, 0xDFDF8DA4, 0xA3A35C97, 0xEAEAD57E, 0x353558DA,
     201             :     0xEDEDD07A, 0x4343FC17, 0xF8F8CB66, 0xFBFBB194, 0x3737D3A1, 0xFAFA401D,
     202             :     0xC2C2683D, 0xB4B4CCF0, 0x32325DDE, 0x9C9C71B3, 0x5656E70B, 0xE3E3DA72,
     203             :     0x878760A7, 0x15151B1C, 0xF9F93AEF, 0x6363BFD1, 0x3434A953, 0x9A9A853E,
     204             :     0xB1B1428F, 0x7C7CD133, 0x88889B26, 0x3D3DA65F, 0xA1A1D7EC, 0xE4E4DF76,
     205             :     0x8181942A, 0x91910149, 0x0F0FFB81, 0xEEEEAA88, 0x161661EE, 0xD7D77321,
     206             :     0x9797F5C4, 0xA5A5A81A, 0xFEFE3FEB, 0x6D6DB5D9, 0x7878AEC5, 0xC5C56D39,
     207             :     0x1D1DE599, 0x7676A4CD, 0x3E3EDCAD, 0xCBCB6731, 0xB6B6478B, 0xEFEF5B01,
     208             :     0x12121E18, 0x6060C523, 0x6A6AB0DD, 0x4D4DF61F, 0xCECEE94E, 0xDEDE7C2D,
     209             :     0x55559DF9, 0x7E7E5A48, 0x2121B24F, 0x03037AF2, 0xA0A02665, 0x5E5E198E,
     210             :     0x5A5A6678, 0x65654B5C, 0x62624E58, 0xFDFD4519, 0x0606F48D, 0x404086E5,
     211             :     0xF2F2BE98, 0x3333AC57, 0x17179067, 0x05058E7F, 0xE8E85E05, 0x4F4F7D64,
     212             :     0x89896AAF, 0x10109563, 0x74742FB6, 0x0A0A75FE, 0x5C5C92F5, 0x9B9B74B7,
     213             :     0x2D2D333C, 0x3030D6A5, 0x2E2E49CE, 0x494989E9, 0x46467268, 0x77775544,
     214             :     0xA8A8D8E0, 0x9696044D, 0x2828BD43, 0xA9A92969, 0xD9D97929, 0x8686912E,
     215             :     0xD1D187AC, 0xF4F44A15, 0x8D8D1559, 0xD6D682A8, 0xB9B9BC0A, 0x42420D9E,
     216             :     0xF6F6C16E, 0x2F2FB847, 0xDDDD06DF, 0x23233934, 0xCCCC6235, 0xF1F1C46A,
     217             :     0xC1C112CF, 0x8585EBDC, 0x8F8F9E22, 0x7171A1C9, 0x9090F0C0, 0xAAAA539B,
     218             :     0x0101F189, 0x8B8BE1D4, 0x4E4E8CED, 0x8E8E6FAB, 0xABABA212, 0x6F6F3EA2,
     219             :     0xE6E6540D, 0xDBDBF252, 0x92927BBB, 0xB7B7B602, 0x6969CA2F, 0x3939D9A9,
     220             :     0xD3D30CD7, 0xA7A72361, 0xA2A2AD1E, 0xC3C399B4, 0x6C6C4450, 0x07070504,
     221             :     0x04047FF6, 0x272746C2, 0xACACA716, 0xD0D07625, 0x50501386, 0xDCDCF756,
     222             :     0x84841A55, 0xE1E15109, 0x7A7A25BE, 0x1313EF91},
     223             : 
     224             :    {0xA9D93939, 0x67901717, 0xB3719C9C, 0xE8D2A6A6, 0x04050707, 0xFD985252,
     225             :     0xA3658080, 0x76DFE4E4, 0x9A084545, 0x92024B4B, 0x80A0E0E0, 0x78665A5A,
     226             :     0xE4DDAFAF, 0xDDB06A6A, 0xD1BF6363, 0x38362A2A, 0x0D54E6E6, 0xC6432020,
     227             :     0x3562CCCC, 0x98BEF2F2, 0x181E1212, 0xF724EBEB, 0xECD7A1A1, 0x6C774141,
     228             :     0x43BD2828, 0x7532BCBC, 0x37D47B7B, 0x269B8888, 0xFA700D0D, 0x13F94444,
     229             :     0x94B1FBFB, 0x485A7E7E, 0xF27A0303, 0xD0E48C8C, 0x8B47B6B6, 0x303C2424,
     230             :     0x84A5E7E7, 0x54416B6B, 0xDF06DDDD, 0x23C56060, 0x1945FDFD, 0x5BA33A3A,
     231             :     0x3D68C2C2, 0x59158D8D, 0xF321ECEC, 0xAE316666, 0xA23E6F6F, 0x82165757,
     232             :     0x63951010, 0x015BEFEF, 0x834DB8B8, 0x2E918686, 0xD9B56D6D, 0x511F8383,
     233             :     0x9B53AAAA, 0x7C635D5D, 0xA63B6868, 0xEB3FFEFE, 0xA5D63030, 0xBE257A7A,
     234             :     0x16A7ACAC, 0x0C0F0909, 0xE335F0F0, 0x6123A7A7, 0xC0F09090, 0x8CAFE9E9,
     235             :     0x3A809D9D, 0xF5925C5C, 0x73810C0C, 0x2C273131, 0x2576D0D0, 0x0BE75656,
     236             :     0xBB7B9292, 0x4EE9CECE, 0x89F10101, 0x6B9F1E1E, 0x53A93434, 0x6AC4F1F1,
     237             :     0xB499C3C3, 0xF1975B5B, 0xE1834747, 0xE66B1818, 0xBDC82222, 0x450E9898,
     238             :     0xE26E1F1F, 0xF4C9B3B3, 0xB62F7474, 0x66CBF8F8, 0xCCFF9999, 0x95EA1414,
     239             :     0x03ED5858, 0x56F7DCDC, 0xD4E18B8B, 0x1C1B1515, 0x1EADA2A2, 0xD70CD3D3,
     240             :     0xFB2BE2E2, 0xC31DC8C8, 0x8E195E5E, 0xB5C22C2C, 0xE9894949, 0xCF12C1C1,
     241             :     0xBF7E9595, 0xBA207D7D, 0xEA641111, 0x77840B0B, 0x396DC5C5, 0xAF6A8989,
     242             :     0x33D17C7C, 0xC9A17171, 0x62CEFFFF, 0x7137BBBB, 0x81FB0F0F, 0x793DB5B5,
     243             :     0x0951E1E1, 0xADDC3E3E, 0x242D3F3F, 0xCDA47676, 0xF99D5555, 0xD8EE8282,
     244             :     0xE5864040, 0xC5AE7878, 0xB9CD2525, 0x4D049696, 0x44557777, 0x080A0E0E,
     245             :     0x86135050, 0xE730F7F7, 0xA1D33737, 0x1D40FAFA, 0xAA346161, 0xED8C4E4E,
     246             :     0x06B3B0B0, 0x706C5454, 0xB22A7373, 0xD2523B3B, 0x410B9F9F, 0x7B8B0202,
     247             :     0xA088D8D8, 0x114FF3F3, 0x3167CBCB, 0xC2462727, 0x27C06767, 0x90B4FCFC,
     248             :     0x20283838, 0xF67F0404, 0x60784848, 0xFF2EE5E5, 0x96074C4C, 0x5C4B6565,
     249             :     0xB1C72B2B, 0xAB6F8E8E, 0x9E0D4242, 0x9CBBF5F5, 0x52F2DBDB, 0x1BF34A4A,
     250             :     0x5FA63D3D, 0x9359A4A4, 0x0ABCB9B9, 0xEF3AF9F9, 0x91EF1313, 0x85FE0808,
     251             :     0x49019191, 0xEE611616, 0x2D7CDEDE, 0x4FB22121, 0x8F42B1B1, 0x3BDB7272,
     252             :     0x47B82F2F, 0x8748BFBF, 0x6D2CAEAE, 0x46E3C0C0, 0xD6573C3C, 0x3E859A9A,
     253             :     0x6929A9A9, 0x647D4F4F, 0x2A948181, 0xCE492E2E, 0xCB17C6C6, 0x2FCA6969,
     254             :     0xFCC3BDBD, 0x975CA3A3, 0x055EE8E8, 0x7AD0EDED, 0xAC87D1D1, 0x7F8E0505,
     255             :     0xD5BA6464, 0x1AA8A5A5, 0x4BB72626, 0x0EB9BEBE, 0xA7608787, 0x5AF8D5D5,
     256             :     0x28223636, 0x14111B1B, 0x3FDE7575, 0x2979D9D9, 0x88AAEEEE, 0x3C332D2D,
     257             :     0x4C5F7979, 0x02B6B7B7, 0xB896CACA, 0xDA583535, 0xB09CC4C4, 0x17FC4343,
     258             :     0x551A8484, 0x1FF64D4D, 0x8A1C5959, 0x7D38B2B2, 0x57AC3333, 0xC718CFCF,
     259             :     0x8DF40606, 0x74695353, 0xB7749B9B, 0xC4F59797, 0x9F56ADAD, 0x72DAE3E3,
     260             :     0x7ED5EAEA, 0x154AF4F4, 0x229E8F8F, 0x12A2ABAB, 0x584E6262, 0x07E85F5F,
     261             :     0x99E51D1D, 0x34392323, 0x6EC1F6F6, 0x50446C6C, 0xDE5D3232, 0x68724646,
     262             :     0x6526A0A0, 0xBC93CDCD, 0xDB03DADA, 0xF8C6BABA, 0xC8FA9E9E, 0xA882D6D6,
     263             :     0x2BCF6E6E, 0x40507070, 0xDCEB8585, 0xFE750A0A, 0x328A9393, 0xA48DDFDF,
     264             :     0xCA4C2929, 0x10141C1C, 0x2173D7D7, 0xF0CCB4B4, 0xD309D4D4, 0x5D108A8A,
     265             :     0x0FE25151, 0x00000000, 0x6F9A1919, 0x9DE01A1A, 0x368F9494, 0x42E6C7C7,
     266             :     0x4AECC9C9, 0x5EFDD2D2, 0xC1AB7F7F, 0xE0D8A8A8},
     267             : 
     268             :    {0xBC75BC32, 0xECF3EC21, 0x20C62043, 0xB3F4B3C9, 0xDADBDA03, 0x027B028B,
     269             :     0xE2FBE22B, 0x9EC89EFA, 0xC94AC9EC, 0xD4D3D409, 0x18E6186B, 0x1E6B1E9F,
     270             :     0x9845980E, 0xB27DB238, 0xA6E8A6D2, 0x264B26B7, 0x3CD63C57, 0x9332938A,
     271             :     0x82D882EE, 0x52FD5298, 0x7B377BD4, 0xBB71BB37, 0x5BF15B97, 0x47E14783,
     272             :     0x2430243C, 0x510F51E2, 0xBAF8BAC6, 0x4A1B4AF3, 0xBF87BF48, 0x0DFA0D70,
     273             :     0xB006B0B3, 0x753F75DE, 0xD25ED2FD, 0x7DBA7D20, 0x66AE6631, 0x3A5B3AA3,
     274             :     0x598A591C, 0x00000000, 0xCDBCCD93, 0x1A9D1AE0, 0xAE6DAE2C, 0x7FC17FAB,
     275             :     0x2BB12BC7, 0xBE0EBEB9, 0xE080E0A0, 0x8A5D8A10, 0x3BD23B52, 0x64D564BA,
     276             :     0xD8A0D888, 0xE784E7A5, 0x5F075FE8, 0x1B141B11, 0x2CB52CC2, 0xFC90FCB4,
     277             :     0x312C3127, 0x80A38065, 0x73B2732A, 0x0C730C81, 0x794C795F, 0x6B546B41,
     278             :     0x4B924B02, 0x53745369, 0x9436948F, 0x8351831F, 0x2A382A36, 0xC4B0C49C,
     279             :     0x22BD22C8, 0xD55AD5F8, 0xBDFCBDC3, 0x48604878, 0xFF62FFCE, 0x4C964C07,
     280             :     0x416C4177, 0xC742C7E6, 0xEBF7EB24, 0x1C101C14, 0x5D7C5D63, 0x36283622,
     281             :     0x672767C0, 0xE98CE9AF, 0x441344F9, 0x149514EA, 0xF59CF5BB, 0xCFC7CF18,
     282             :     0x3F243F2D, 0xC046C0E3, 0x723B72DB, 0x5470546C, 0x29CA294C, 0xF0E3F035,
     283             :     0x088508FE, 0xC6CBC617, 0xF311F34F, 0x8CD08CE4, 0xA493A459, 0xCAB8CA96,
     284             :     0x68A6683B, 0xB883B84D, 0x38203828, 0xE5FFE52E, 0xAD9FAD56, 0x0B770B84,
     285             :     0xC8C3C81D, 0x99CC99FF, 0x580358ED, 0x196F199A, 0x0E080E0A, 0x95BF957E,
     286             :     0x70407050, 0xF7E7F730, 0x6E2B6ECF, 0x1FE21F6E, 0xB579B53D, 0x090C090F,
     287             :     0x61AA6134, 0x57825716, 0x9F419F0B, 0x9D3A9D80, 0x11EA1164, 0x25B925CD,
     288             :     0xAFE4AFDD, 0x459A4508, 0xDFA4DF8D, 0xA397A35C, 0xEA7EEAD5, 0x35DA3558,
     289             :     0xED7AEDD0, 0x431743FC, 0xF866F8CB, 0xFB94FBB1, 0x37A137D3, 0xFA1DFA40,
     290             :     0xC23DC268, 0xB4F0B4CC, 0x32DE325D, 0x9CB39C71, 0x560B56E7, 0xE372E3DA,
     291             :     0x87A78760, 0x151C151B, 0xF9EFF93A, 0x63D163BF, 0x345334A9, 0x9A3E9A85,
     292             :     0xB18FB142, 0x7C337CD1, 0x8826889B, 0x3D5F3DA6, 0xA1ECA1D7, 0xE476E4DF,
     293             :     0x812A8194, 0x91499101, 0x0F810FFB, 0xEE88EEAA, 0x16EE1661, 0xD721D773,
     294             :     0x97C497F5, 0xA51AA5A8, 0xFEEBFE3F, 0x6DD96DB5, 0x78C578AE, 0xC539C56D,
     295             :     0x1D991DE5, 0x76CD76A4, 0x3EAD3EDC, 0xCB31CB67, 0xB68BB647, 0xEF01EF5B,
     296             :     0x1218121E, 0x602360C5, 0x6ADD6AB0, 0x4D1F4DF6, 0xCE4ECEE9, 0xDE2DDE7C,
     297             :     0x55F9559D, 0x7E487E5A, 0x214F21B2, 0x03F2037A, 0xA065A026, 0x5E8E5E19,
     298             :     0x5A785A66, 0x655C654B, 0x6258624E, 0xFD19FD45, 0x068D06F4, 0x40E54086,
     299             :     0xF298F2BE, 0x335733AC, 0x17671790, 0x057F058E, 0xE805E85E, 0x4F644F7D,
     300             :     0x89AF896A, 0x10631095, 0x74B6742F, 0x0AFE0A75, 0x5CF55C92, 0x9BB79B74,
     301             :     0x2D3C2D33, 0x30A530D6, 0x2ECE2E49, 0x49E94989, 0x46684672, 0x77447755,
     302             :     0xA8E0A8D8, 0x964D9604, 0x284328BD, 0xA969A929, 0xD929D979, 0x862E8691,
     303             :     0xD1ACD187, 0xF415F44A, 0x8D598D15, 0xD6A8D682, 0xB90AB9BC, 0x429E420D,
     304             :     0xF66EF6C1, 0x2F472FB8, 0xDDDFDD06, 0x23342339, 0xCC35CC62, 0xF16AF1C4,
     305             :     0xC1CFC112, 0x85DC85EB, 0x8F228F9E, 0x71C971A1, 0x90C090F0, 0xAA9BAA53,
     306             :     0x018901F1, 0x8BD48BE1, 0x4EED4E8C, 0x8EAB8E6F, 0xAB12ABA2, 0x6FA26F3E,
     307             :     0xE60DE654, 0xDB52DBF2, 0x92BB927B, 0xB702B7B6, 0x692F69CA, 0x39A939D9,
     308             :     0xD3D7D30C, 0xA761A723, 0xA21EA2AD, 0xC3B4C399, 0x6C506C44, 0x07040705,
     309             :     0x04F6047F, 0x27C22746, 0xAC16ACA7, 0xD025D076, 0x50865013, 0xDC56DCF7,
     310             :     0x8455841A, 0xE109E151, 0x7ABE7A25, 0x139113EF},
     311             : 
     312             :    {0xD939A9D9, 0x90176790, 0x719CB371, 0xD2A6E8D2, 0x05070405, 0x9852FD98,
     313             :     0x6580A365, 0xDFE476DF, 0x08459A08, 0x024B9202, 0xA0E080A0, 0x665A7866,
     314             :     0xDDAFE4DD, 0xB06ADDB0, 0xBF63D1BF, 0x362A3836, 0x54E60D54, 0x4320C643,
     315             :     0x62CC3562, 0xBEF298BE, 0x1E12181E, 0x24EBF724, 0xD7A1ECD7, 0x77416C77,
     316             :     0xBD2843BD, 0x32BC7532, 0xD47B37D4, 0x9B88269B, 0x700DFA70, 0xF94413F9,
     317             :     0xB1FB94B1, 0x5A7E485A, 0x7A03F27A, 0xE48CD0E4, 0x47B68B47, 0x3C24303C,
     318             :     0xA5E784A5, 0x416B5441, 0x06DDDF06, 0xC56023C5, 0x45FD1945, 0xA33A5BA3,
     319             :     0x68C23D68, 0x158D5915, 0x21ECF321, 0x3166AE31, 0x3E6FA23E, 0x16578216,
     320             :     0x95106395, 0x5BEF015B, 0x4DB8834D, 0x91862E91, 0xB56DD9B5, 0x1F83511F,
     321             :     0x53AA9B53, 0x635D7C63, 0x3B68A63B, 0x3FFEEB3F, 0xD630A5D6, 0x257ABE25,
     322             :     0xA7AC16A7, 0x0F090C0F, 0x35F0E335, 0x23A76123, 0xF090C0F0, 0xAFE98CAF,
     323             :     0x809D3A80, 0x925CF592, 0x810C7381, 0x27312C27, 0x76D02576, 0xE7560BE7,
     324             :     0x7B92BB7B, 0xE9CE4EE9, 0xF10189F1, 0x9F1E6B9F, 0xA93453A9, 0xC4F16AC4,
     325             :     0x99C3B499, 0x975BF197, 0x8347E183, 0x6B18E66B, 0xC822BDC8, 0x0E98450E,
     326             :     0x6E1FE26E, 0xC9B3F4C9, 0x2F74B62F, 0xCBF866CB, 0xFF99CCFF, 0xEA1495EA,
     327             :     0xED5803ED, 0xF7DC56F7, 0xE18BD4E1, 0x1B151C1B, 0xADA21EAD, 0x0CD3D70C,
     328             :     0x2BE2FB2B, 0x1DC8C31D, 0x195E8E19, 0xC22CB5C2, 0x8949E989, 0x12C1CF12,
     329             :     0x7E95BF7E, 0x207DBA20, 0x6411EA64, 0x840B7784, 0x6DC5396D, 0x6A89AF6A,
     330             :     0xD17C33D1, 0xA171C9A1, 0xCEFF62CE, 0x37BB7137, 0xFB0F81FB, 0x3DB5793D,
     331             :     0x51E10951, 0xDC3EADDC, 0x2D3F242D, 0xA476CDA4, 0x9D55F99D, 0xEE82D8EE,
     332             :     0x8640E586, 0xAE78C5AE, 0xCD25B9CD, 0x04964D04, 0x55774455, 0x0A0E080A,
     333             :     0x13508613, 0x30F7E730, 0xD337A1D3, 0x40FA1D40, 0x3461AA34, 0x8C4EED8C,
     334             :     0xB3B006B3, 0x6C54706C, 0x2A73B22A, 0x523BD252, 0x0B9F410B, 0x8B027B8B,
     335             :     0x88D8A088, 0x4FF3114F, 0x67CB3167, 0x4627C246, 0xC06727C0, 0xB4FC90B4,
     336             :     0x28382028, 0x7F04F67F, 0x78486078, 0x2EE5FF2E, 0x074C9607, 0x4B655C4B,
     337             :     0xC72BB1C7, 0x6F8EAB6F, 0x0D429E0D, 0xBBF59CBB, 0xF2DB52F2, 0xF34A1BF3,
     338             :     0xA63D5FA6, 0x59A49359, 0xBCB90ABC, 0x3AF9EF3A, 0xEF1391EF, 0xFE0885FE,
     339             :     0x01914901, 0x6116EE61, 0x7CDE2D7C, 0xB2214FB2, 0x42B18F42, 0xDB723BDB,
     340             :     0xB82F47B8, 0x48BF8748, 0x2CAE6D2C, 0xE3C046E3, 0x573CD657, 0x859A3E85,
     341             :     0x29A96929, 0x7D4F647D, 0x94812A94, 0x492ECE49, 0x17C6CB17, 0xCA692FCA,
     342             :     0xC3BDFCC3, 0x5CA3975C, 0x5EE8055E, 0xD0ED7AD0, 0x87D1AC87, 0x8E057F8E,
     343             :     0xBA64D5BA, 0xA8A51AA8, 0xB7264BB7, 0xB9BE0EB9, 0x6087A760, 0xF8D55AF8,
     344             :     0x22362822, 0x111B1411, 0xDE753FDE, 0x79D92979, 0xAAEE88AA, 0x332D3C33,
     345             :     0x5F794C5F, 0xB6B702B6, 0x96CAB896, 0x5835DA58, 0x9CC4B09C, 0xFC4317FC,
     346             :     0x1A84551A, 0xF64D1FF6, 0x1C598A1C, 0x38B27D38, 0xAC3357AC, 0x18CFC718,
     347             :     0xF4068DF4, 0x69537469, 0x749BB774, 0xF597C4F5, 0x56AD9F56, 0xDAE372DA,
     348             :     0xD5EA7ED5, 0x4AF4154A, 0x9E8F229E, 0xA2AB12A2, 0x4E62584E, 0xE85F07E8,
     349             :     0xE51D99E5, 0x39233439, 0xC1F66EC1, 0x446C5044, 0x5D32DE5D, 0x72466872,
     350             :     0x26A06526, 0x93CDBC93, 0x03DADB03, 0xC6BAF8C6, 0xFA9EC8FA, 0x82D6A882,
     351             :     0xCF6E2BCF, 0x50704050, 0xEB85DCEB, 0x750AFE75, 0x8A93328A, 0x8DDFA48D,
     352             :     0x4C29CA4C, 0x141C1014, 0x73D72173, 0xCCB4F0CC, 0x09D4D309, 0x108A5D10,
     353             :     0xE2510FE2, 0x00000000, 0x9A196F9A, 0xE01A9DE0, 0x8F94368F, 0xE6C742E6,
     354             :     0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8}
     355             : };
     356             : 
     357             : /* The exp_to_poly and poly_to_exp tables are used to perform efficient
     358             :  * operations in GF(2^8) represented as GF(2)[x]/w(x) where
     359             :  * w(x)=x^8+x^6+x^3+x^2+1.  We care about doing that because it's part of the
     360             :  * definition of the RS matrix in the key schedule.  Elements of that field
     361             :  * are polynomials of degree not greater than 7 and all coefficients 0 or 1,
     362             :  * which can be represented naturally by bytes (just substitute x=2).  In that
     363             :  * form, GF(2^8) addition is the same as bitwise XOR, but GF(2^8)
     364             :  * multiplication is inefficient without hardware support.  To multiply
     365             :  * faster, I make use of the fact x is a generator for the nonzero elements,
     366             :  * so that every element p of GF(2)[x]/w(x) is either 0 or equal to (x)^n for
     367             :  * some n in 0..254.  Note that that caret is exponentiation in GF(2^8),
     368             :  * *not* polynomial notation.  So if I want to compute pq where p and q are
     369             :  * in GF(2^8), I can just say:
     370             :  *    1. if p=0 or q=0 then pq=0
     371             :  *    2. otherwise, find m and n such that p=x^m and q=x^n
     372             :  *    3. pq=(x^m)(x^n)=x^(m+n), so add m and n and find pq
     373             :  * The translations in steps 2 and 3 are looked up in the tables
     374             :  * poly_to_exp (for step 2) and exp_to_poly (for step 3).  To see this
     375             :  * in action, look at the CALC_S macro.  As additional wrinkles, note that
     376             :  * one of my operands is always a constant, so the poly_to_exp lookup on it
     377             :  * is done in advance; I included the original values in the comments so
     378             :  * readers can have some chance of recognizing that this *is* the RS matrix
     379             :  * from the Twofish paper.  I've only included the table entries I actually
     380             :  * need; I never do a lookup on a variable input of zero and the biggest
     381             :  * exponents I'll ever see are 254 (variable) and 237 (constant), so they'll
     382             :  * never sum to more than 491.  I'm repeating part of the exp_to_poly table
     383             :  * so that I don't have to do mod-255 reduction in the exponent arithmetic.
     384             :  * Since I know my constant operands are never zero, I only have to worry
     385             :  * about zero values in the variable operand, and I do it with a simple
     386             :  * conditional branch.  I know conditionals are expensive, but I couldn't
     387             :  * see a non-horrible way of avoiding them, and I did manage to group the
     388             :  * statements so that each if covers four group multiplications. */
     389             : 
     390             : static const u16 poly_to_exp[256] = {
     391             :    492,
     392             :    0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
     393             :    0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
     394             :    0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
     395             :    0x9B, 0xB7, 0xC1, 0x31, 0x2B, 0xA7, 0xA3, 0x95, 0x98, 0x4C, 0xCA, 0x1B,
     396             :    0xE6, 0x8D, 0x73, 0x36, 0xCD, 0x82, 0x12, 0x56, 0x62, 0xAB, 0xF0, 0x47,
     397             :    0x4F, 0x0E, 0xBD, 0x06, 0xD4, 0x25, 0xD2, 0x5E, 0x27, 0x88, 0x66, 0x6D,
     398             :    0xD6, 0x9C, 0x79, 0xB8, 0x08, 0xC2, 0xDF, 0x32, 0x68, 0x2C, 0xFD, 0xA8,
     399             :    0x8A, 0xA4, 0x5A, 0x96, 0x29, 0x99, 0x22, 0x4D, 0x60, 0xCB, 0xE4, 0x1C,
     400             :    0x7B, 0xE7, 0x3B, 0x8E, 0x9E, 0x74, 0xF4, 0x37, 0xD8, 0xCE, 0xF9, 0x83,
     401             :    0x6F, 0x13, 0xB2, 0x57, 0xE1, 0x63, 0xDC, 0xAC, 0xC4, 0xF1, 0xAF, 0x48,
     402             :    0x0A, 0x50, 0x42, 0x0F, 0xBA, 0xBE, 0xC7, 0x07, 0xDE, 0xD5, 0x78, 0x26,
     403             :    0x65, 0xD3, 0xD1, 0x5F, 0xE3, 0x28, 0x21, 0x89, 0x59, 0x67, 0xFC, 0x6E,
     404             :    0xB1, 0xD7, 0xF8, 0x9D, 0xF3, 0x7A, 0x3A, 0xB9, 0xC6, 0x09, 0x41, 0xC3,
     405             :    0xAE, 0xE0, 0xDB, 0x33, 0x44, 0x69, 0x92, 0x2D, 0x52, 0xFE, 0x16, 0xA9,
     406             :    0x0C, 0x8B, 0x80, 0xA5, 0x4A, 0x5B, 0xB5, 0x97, 0xC9, 0x2A, 0xA2, 0x9A,
     407             :    0xC0, 0x23, 0x86, 0x4E, 0xBC, 0x61, 0xEF, 0xCC, 0x11, 0xE5, 0x72, 0x1D,
     408             :    0x3D, 0x7C, 0xEB, 0xE8, 0xE9, 0x3C, 0xEA, 0x8F, 0x7D, 0x9F, 0xEC, 0x75,
     409             :    0x1E, 0xF5, 0x3E, 0x38, 0xF6, 0xD9, 0x3F, 0xCF, 0x76, 0xFA, 0x1F, 0x84,
     410             :    0xA0, 0x70, 0xED, 0x14, 0x90, 0xB3, 0x7E, 0x58, 0xFB, 0xE2, 0x20, 0x64,
     411             :    0xD0, 0xDD, 0x77, 0xAD, 0xDA, 0xC5, 0x40, 0xF2, 0x39, 0xB0, 0xF7, 0x49,
     412             :    0xB4, 0x0B, 0x7F, 0x51, 0x15, 0x43, 0x91, 0x10, 0x71, 0xBB, 0xEE, 0xBF,
     413             :    0x85, 0xC8, 0xA1
     414             : };
     415             : 
     416             : static const byte exp_to_poly[492 + 256] = {
     417             :    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
     418             :    0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
     419             :    0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
     420             :    0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52, 0xA4, 0x05, 0x0A,
     421             :    0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0, 0xED, 0x97, 0x63,
     422             :    0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1, 0x0F, 0x1E, 0x3C,
     423             :    0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A, 0xF4, 0xA5, 0x07,
     424             :    0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11, 0x22, 0x44, 0x88,
     425             :    0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51, 0xA2, 0x09, 0x12,
     426             :    0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66, 0xCC, 0xD5, 0xE7,
     427             :    0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB, 0x1B, 0x36, 0x6C,
     428             :    0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19, 0x32, 0x64, 0xC8,
     429             :    0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D, 0x5A, 0xB4, 0x25,
     430             :    0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56, 0xAC, 0x15, 0x2A,
     431             :    0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE, 0x91, 0x6F, 0xDE,
     432             :    0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9, 0x3F, 0x7E, 0xFC,
     433             :    0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE, 0xB1, 0x2F, 0x5E,
     434             :    0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41, 0x82, 0x49, 0x92,
     435             :    0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E, 0x71, 0xE2, 0x89,
     436             :    0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB, 0xDB, 0xFB, 0xBB,
     437             :    0x3B, 0x76, 0xEC, 0x95, 0x67, 0xCE, 0xD1, 0xEF, 0x93, 0x6B, 0xD6, 0xE1,
     438             :    0x8F, 0x53, 0xA6, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D,
     439             :    0x9A, 0x79, 0xF2, 0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC,
     440             :    0xF5, 0xA7, 0x03, 0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3,
     441             :    0x8B, 0x5B, 0xB6, 0x21, 0x42, 0x84, 0x45, 0x8A, 0x59, 0xB2, 0x29, 0x52,
     442             :    0xA4, 0x05, 0x0A, 0x14, 0x28, 0x50, 0xA0, 0x0D, 0x1A, 0x34, 0x68, 0xD0,
     443             :    0xED, 0x97, 0x63, 0xC6, 0xC1, 0xCF, 0xD3, 0xEB, 0x9B, 0x7B, 0xF6, 0xA1,
     444             :    0x0F, 0x1E, 0x3C, 0x78, 0xF0, 0xAD, 0x17, 0x2E, 0x5C, 0xB8, 0x3D, 0x7A,
     445             :    0xF4, 0xA5, 0x07, 0x0E, 0x1C, 0x38, 0x70, 0xE0, 0x8D, 0x57, 0xAE, 0x11,
     446             :    0x22, 0x44, 0x88, 0x5D, 0xBA, 0x39, 0x72, 0xE4, 0x85, 0x47, 0x8E, 0x51,
     447             :    0xA2, 0x09, 0x12, 0x24, 0x48, 0x90, 0x6D, 0xDA, 0xF9, 0xBF, 0x33, 0x66,
     448             :    0xCC, 0xD5, 0xE7, 0x83, 0x4B, 0x96, 0x61, 0xC2, 0xC9, 0xDF, 0xF3, 0xAB,
     449             :    0x1B, 0x36, 0x6C, 0xD8, 0xFD, 0xB7, 0x23, 0x46, 0x8C, 0x55, 0xAA, 0x19,
     450             :    0x32, 0x64, 0xC8, 0xDD, 0xF7, 0xA3, 0x0B, 0x16, 0x2C, 0x58, 0xB0, 0x2D,
     451             :    0x5A, 0xB4, 0x25, 0x4A, 0x94, 0x65, 0xCA, 0xD9, 0xFF, 0xB3, 0x2B, 0x56,
     452             :    0xAC, 0x15, 0x2A, 0x54, 0xA8, 0x1D, 0x3A, 0x74, 0xE8, 0x9D, 0x77, 0xEE,
     453             :    0x91, 0x6F, 0xDE, 0xF1, 0xAF, 0x13, 0x26, 0x4C, 0x98, 0x7D, 0xFA, 0xB9,
     454             :    0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
     455             :    0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
     456             :    0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
     457             :    0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB,
     458             : };
     459             : 
     460             : 
     461             : /* The table constants are indices of
     462             :  * S-box entries, preprocessed through q0 and q1. */
     463             : static byte calc_sb_tbl[512] = {
     464             :     0xA9, 0x75, 0x67, 0xF3, 0xB3, 0xC6, 0xE8, 0xF4,
     465             :     0x04, 0xDB, 0xFD, 0x7B, 0xA3, 0xFB, 0x76, 0xC8,
     466             :     0x9A, 0x4A, 0x92, 0xD3, 0x80, 0xE6, 0x78, 0x6B,
     467             :     0xE4, 0x45, 0xDD, 0x7D, 0xD1, 0xE8, 0x38, 0x4B,
     468             :     0x0D, 0xD6, 0xC6, 0x32, 0x35, 0xD8, 0x98, 0xFD,
     469             :     0x18, 0x37, 0xF7, 0x71, 0xEC, 0xF1, 0x6C, 0xE1,
     470             :     0x43, 0x30, 0x75, 0x0F, 0x37, 0xF8, 0x26, 0x1B,
     471             :     0xFA, 0x87, 0x13, 0xFA, 0x94, 0x06, 0x48, 0x3F,
     472             :     0xF2, 0x5E, 0xD0, 0xBA, 0x8B, 0xAE, 0x30, 0x5B,
     473             :     0x84, 0x8A, 0x54, 0x00, 0xDF, 0xBC, 0x23, 0x9D,
     474             :     0x19, 0x6D, 0x5B, 0xC1, 0x3D, 0xB1, 0x59, 0x0E,
     475             :     0xF3, 0x80, 0xAE, 0x5D, 0xA2, 0xD2, 0x82, 0xD5,
     476             :     0x63, 0xA0, 0x01, 0x84, 0x83, 0x07, 0x2E, 0x14,
     477             :     0xD9, 0xB5, 0x51, 0x90, 0x9B, 0x2C, 0x7C, 0xA3,
     478             :     0xA6, 0xB2, 0xEB, 0x73, 0xA5, 0x4C, 0xBE, 0x54,
     479             :     0x16, 0x92, 0x0C, 0x74, 0xE3, 0x36, 0x61, 0x51,
     480             :     0xC0, 0x38, 0x8C, 0xB0, 0x3A, 0xBD, 0xF5, 0x5A,
     481             :     0x73, 0xFC, 0x2C, 0x60, 0x25, 0x62, 0x0B, 0x96,
     482             :     0xBB, 0x6C, 0x4E, 0x42, 0x89, 0xF7, 0x6B, 0x10,
     483             :     0x53, 0x7C, 0x6A, 0x28, 0xB4, 0x27, 0xF1, 0x8C,
     484             :     0xE1, 0x13, 0xE6, 0x95, 0xBD, 0x9C, 0x45, 0xC7,
     485             :     0xE2, 0x24, 0xF4, 0x46, 0xB6, 0x3B, 0x66, 0x70,
     486             :     0xCC, 0xCA, 0x95, 0xE3, 0x03, 0x85, 0x56, 0xCB,
     487             :     0xD4, 0x11, 0x1C, 0xD0, 0x1E, 0x93, 0xD7, 0xB8,
     488             :     0xFB, 0xA6, 0xC3, 0x83, 0x8E, 0x20, 0xB5, 0xFF,
     489             :     0xE9, 0x9F, 0xCF, 0x77, 0xBF, 0xC3, 0xBA, 0xCC,
     490             :     0xEA, 0x03, 0x77, 0x6F, 0x39, 0x08, 0xAF, 0xBF,
     491             :     0x33, 0x40, 0xC9, 0xE7, 0x62, 0x2B, 0x71, 0xE2,
     492             :     0x81, 0x79, 0x79, 0x0C, 0x09, 0xAA, 0xAD, 0x82,
     493             :     0x24, 0x41, 0xCD, 0x3A, 0xF9, 0xEA, 0xD8, 0xB9,
     494             :     0xE5, 0xE4, 0xC5, 0x9A, 0xB9, 0xA4, 0x4D, 0x97,
     495             :     0x44, 0x7E, 0x08, 0xDA, 0x86, 0x7A, 0xE7, 0x17,
     496             :     0xA1, 0x66, 0x1D, 0x94, 0xAA, 0xA1, 0xED, 0x1D,
     497             :     0x06, 0x3D, 0x70, 0xF0, 0xB2, 0xDE, 0xD2, 0xB3,
     498             :     0x41, 0x0B, 0x7B, 0x72, 0xA0, 0xA7, 0x11, 0x1C,
     499             :     0x31, 0xEF, 0xC2, 0xD1, 0x27, 0x53, 0x90, 0x3E,
     500             :     0x20, 0x8F, 0xF6, 0x33, 0x60, 0x26, 0xFF, 0x5F,
     501             :     0x96, 0xEC, 0x5C, 0x76, 0xB1, 0x2A, 0xAB, 0x49,
     502             :     0x9E, 0x81, 0x9C, 0x88, 0x52, 0xEE, 0x1B, 0x21,
     503             :     0x5F, 0xC4, 0x93, 0x1A, 0x0A, 0xEB, 0xEF, 0xD9,
     504             :     0x91, 0xC5, 0x85, 0x39, 0x49, 0x99, 0xEE, 0xCD,
     505             :     0x2D, 0xAD, 0x4F, 0x31, 0x8F, 0x8B, 0x3B, 0x01,
     506             :     0x47, 0x18, 0x87, 0x23, 0x6D, 0xDD, 0x46, 0x1F,
     507             :     0xD6, 0x4E, 0x3E, 0x2D, 0x69, 0xF9, 0x64, 0x48,
     508             :     0x2A, 0x4F, 0xCE, 0xF2, 0xCB, 0x65, 0x2F, 0x8E,
     509             :     0xFC, 0x78, 0x97, 0x5C, 0x05, 0x58, 0x7A, 0x19,
     510             :     0xAC, 0x8D, 0x7F, 0xE5, 0xD5, 0x98, 0x1A, 0x57,
     511             :     0x4B, 0x67, 0x0E, 0x7F, 0xA7, 0x05, 0x5A, 0x64,
     512             :     0x28, 0xAF, 0x14, 0x63, 0x3F, 0xB6, 0x29, 0xFE,
     513             :     0x88, 0xF5, 0x3C, 0xB7, 0x4C, 0x3C, 0x02, 0xA5,
     514             :     0xB8, 0xCE, 0xDA, 0xE9, 0xB0, 0x68, 0x17, 0x44,
     515             :     0x55, 0xE0, 0x1F, 0x4D, 0x8A, 0x43, 0x7D, 0x69,
     516             :     0x57, 0x29, 0xC7, 0x2E, 0x8D, 0xAC, 0x74, 0x15,
     517             :     0xB7, 0x59, 0xC4, 0xA8, 0x9F, 0x0A, 0x72, 0x9E,
     518             :     0x7E, 0x6E, 0x15, 0x47, 0x22, 0xDF, 0x12, 0x34,
     519             :     0x58, 0x35, 0x07, 0x6A, 0x99, 0xCF, 0x34, 0xDC,
     520             :     0x6E, 0x22, 0x50, 0xC9, 0xDE, 0xC0, 0x68, 0x9B,
     521             :     0x65, 0x89, 0xBC, 0xD4, 0xDB, 0xED, 0xF8, 0xAB,
     522             :     0xC8, 0x12, 0xA8, 0xA2, 0x2B, 0x0D, 0x40, 0x52,
     523             :     0xDC, 0xBB, 0xFE, 0x02, 0x32, 0x2F, 0xA4, 0xA9,
     524             :     0xCA, 0xD7, 0x10, 0x61, 0x21, 0x1E, 0xF0, 0xB4,
     525             :     0xD3, 0x50, 0x5D, 0x04, 0x0F, 0xF6, 0x00, 0xC2,
     526             :     0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
     527             :     0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
     528             : };
     529             : 
     530             : /* Macro to perform one column of the RS matrix multiplication.  The
     531             :  * parameters a, b, c, and d are the four bytes of output; i is the index
     532             :  * of the key bytes, and w, x, y, and z, are the column of constants from
     533             :  * the RS matrix, preprocessed through the poly_to_exp table. */
     534             : 
     535             : #define CALC_S(a, b, c, d, i, w, x, y, z) \
     536             :    { \
     537             :       tmp = poly_to_exp[key[i]]; \
     538             :       (a) ^= exp_to_poly[tmp + (w)]; \
     539             :       (b) ^= exp_to_poly[tmp + (x)]; \
     540             :       (c) ^= exp_to_poly[tmp + (y)]; \
     541             :       (d) ^= exp_to_poly[tmp + (z)]; \
     542             :    }
     543             : 
     544             : /* Macros to calculate the key-dependent S-boxes for a 128-bit key using
     545             :  * the S vector from CALC_S.  CALC_SB_2 computes a single entry in all
     546             :  * four S-boxes, where i is the index of the entry to compute, and a and b
     547             :  * are the index numbers preprocessed through the q0 and q1 tables
     548             :  * respectively.  CALC_SB is simply a convenience to make the code shorter;
     549             :  * it calls CALC_SB_2 four times with consecutive indices from i to i+3,
     550             :  * using the remaining parameters two by two. */
     551             : 
     552             : #define CALC_SB_2(i, a, b) \
     553             :    ctx->s[0][i] = mds[0][q0[(a) ^ sa] ^ se]; \
     554             :    ctx->s[1][i] = mds[1][q0[(b) ^ sb] ^ sf]; \
     555             :    ctx->s[2][i] = mds[2][q1[(a) ^ sc] ^ sg]; \
     556             :    ctx->s[3][i] = mds[3][q1[(b) ^ sd] ^ sh]
     557             : 
     558             : #define CALC_SB(i, a, b, c, d, e, f, g, h) \
     559             :    CALC_SB_2 (i, a, b); CALC_SB_2 ((i)+1, c, d); \
     560             :    CALC_SB_2 ((i)+2, e, f); CALC_SB_2 ((i)+3, g, h)
     561             : 
     562             : /* Macros exactly like CALC_SB and CALC_SB_2, but for 256-bit keys. */
     563             : 
     564             : #define CALC_SB256_2(i, a, b) \
     565             :    ctx->s[0][i] = mds[0][q0[q0[q1[(b) ^ sa] ^ se] ^ si] ^ sm]; \
     566             :    ctx->s[1][i] = mds[1][q0[q1[q1[(a) ^ sb] ^ sf] ^ sj] ^ sn]; \
     567             :    ctx->s[2][i] = mds[2][q1[q0[q0[(a) ^ sc] ^ sg] ^ sk] ^ so]; \
     568             :    ctx->s[3][i] = mds[3][q1[q1[q0[(b) ^ sd] ^ sh] ^ sl] ^ sp];
     569             : 
     570             : #define CALC_SB256(i, a, b, c, d, e, f, g, h) \
     571             :    CALC_SB256_2 (i, a, b); CALC_SB256_2 ((i)+1, c, d); \
     572             :    CALC_SB256_2 ((i)+2, e, f); CALC_SB256_2 ((i)+3, g, h)
     573             : 
     574             : /* Macros to calculate the whitening and round subkeys.  CALC_K_2 computes the
     575             :  * last two stages of the h() function for a given index (either 2i or 2i+1).
     576             :  * a, b, c, and d are the four bytes going into the last two stages.  For
     577             :  * 128-bit keys, this is the entire h() function and a and c are the index
     578             :  * preprocessed through q0 and q1 respectively; for longer keys they are the
     579             :  * output of previous stages.  j is the index of the first key byte to use.
     580             :  * CALC_K computes a pair of subkeys for 128-bit Twofish, by calling CALC_K_2
     581             :  * twice, doing the Pseudo-Hadamard Transform, and doing the necessary
     582             :  * rotations.  Its parameters are: a, the array to write the results into,
     583             :  * j, the index of the first output entry, k and l, the preprocessed indices
     584             :  * for index 2i, and m and n, the preprocessed indices for index 2i+1.
     585             :  * CALC_K256_2 expands CALC_K_2 to handle 256-bit keys, by doing two
     586             :  * additional lookup-and-XOR stages.  The parameters a and b are the index
     587             :  * preprocessed through q0 and q1 respectively; j is the index of the first
     588             :  * key byte to use.  CALC_K256 is identical to CALC_K but for using the
     589             :  * CALC_K256_2 macro instead of CALC_K_2. */
     590             : 
     591             : #define CALC_K_2(a, b, c, d, j) \
     592             :      mds[0][q0[a ^ key[(j) + 8]] ^ key[j]] \
     593             :    ^ mds[1][q0[b ^ key[(j) + 9]] ^ key[(j) + 1]] \
     594             :    ^ mds[2][q1[c ^ key[(j) + 10]] ^ key[(j) + 2]] \
     595             :    ^ mds[3][q1[d ^ key[(j) + 11]] ^ key[(j) + 3]]
     596             : 
     597             : #define CALC_K(a, j, k, l, m, n) \
     598             :    x = CALC_K_2 (k, l, k, l, 0); \
     599             :    y = CALC_K_2 (m, n, m, n, 4); \
     600             :    y = (y << 8) + (y >> 24); \
     601             :    x += y; y += x; ctx->a[j] = x; \
     602             :    ctx->a[(j) + 1] = (y << 9) + (y >> 23)
     603             : 
     604             : #define CALC_K256_2(a, b, j) \
     605             :    CALC_K_2 (q0[q1[b ^ key[(j) + 24]] ^ key[(j) + 16]], \
     606             :              q1[q1[a ^ key[(j) + 25]] ^ key[(j) + 17]], \
     607             :              q0[q0[a ^ key[(j) + 26]] ^ key[(j) + 18]], \
     608             :              q1[q0[b ^ key[(j) + 27]] ^ key[(j) + 19]], j)
     609             : 
     610             : #define CALC_K256(a, j, k, l, m, n) \
     611             :    x = CALC_K256_2 (k, l, 0); \
     612             :    y = CALC_K256_2 (m, n, 4); \
     613             :    y = (y << 8) + (y >> 24); \
     614             :    x += y; y += x; ctx->a[j] = x; \
     615             :    ctx->a[(j) + 1] = (y << 9) + (y >> 23)
     616             : 
     617             : 
     618             : 
     619             : /* Perform the key setup.  Note that this works only with 128- and 256-bit
     620             :  * keys, despite the API that looks like it might support other sizes. */
     621             : 
     622             : static gcry_err_code_t
     623         928 : do_twofish_setkey (TWOFISH_context *ctx, const byte *key, const unsigned keylen)
     624             : {
     625             :   int i, j, k;
     626             : 
     627             :   /* Temporaries for CALC_K. */
     628             :   u32 x, y;
     629             : 
     630             :   /* The S vector used to key the S-boxes, split up into individual bytes.
     631             :    * 128-bit keys use only sa through sh; 256-bit use all of them. */
     632         928 :   byte sa = 0, sb = 0, sc = 0, sd = 0, se = 0, sf = 0, sg = 0, sh = 0;
     633         928 :   byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
     634             : 
     635             :   /* Temporary for CALC_S. */
     636             :   unsigned int tmp;
     637             : 
     638             :   /* Flags for self-test. */
     639             :   static int initialized = 0;
     640             :   static const char *selftest_failed=0;
     641             : 
     642             :   /* Check key length. */
     643         928 :   if( ( ( keylen - 16 ) | 16 ) != 16 )
     644           0 :     return GPG_ERR_INV_KEYLEN;
     645             : 
     646             :   /* Do self-test if necessary. */
     647         928 :   if (!initialized)
     648             :     {
     649           4 :       initialized = 1;
     650           4 :       selftest_failed = selftest ();
     651           4 :       if( selftest_failed )
     652           0 :         log_error("%s\n", selftest_failed );
     653             :     }
     654         928 :   if( selftest_failed )
     655           0 :     return GPG_ERR_SELFTEST_FAILED;
     656             : 
     657             :   /* Compute the first two words of the S vector.  The magic numbers are
     658             :    * the entries of the RS matrix, preprocessed through poly_to_exp.    The
     659             :    * numbers in the comments are the original (polynomial form) matrix
     660             :    * entries. */
     661         928 :   CALC_S (sa, sb, sc, sd, 0, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
     662         928 :   CALC_S (sa, sb, sc, sd, 1, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
     663         928 :   CALC_S (sa, sb, sc, sd, 2, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
     664         928 :   CALC_S (sa, sb, sc, sd, 3, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
     665         928 :   CALC_S (sa, sb, sc, sd, 4, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
     666         928 :   CALC_S (sa, sb, sc, sd, 5, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
     667         928 :   CALC_S (sa, sb, sc, sd, 6, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
     668         928 :   CALC_S (sa, sb, sc, sd, 7, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
     669         928 :   CALC_S (se, sf, sg, sh, 8, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
     670         928 :   CALC_S (se, sf, sg, sh, 9, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
     671         928 :   CALC_S (se, sf, sg, sh, 10, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
     672         928 :   CALC_S (se, sf, sg, sh, 11, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
     673         928 :   CALC_S (se, sf, sg, sh, 12, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
     674         928 :   CALC_S (se, sf, sg, sh, 13, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
     675         928 :   CALC_S (se, sf, sg, sh, 14, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
     676         928 :   CALC_S (se, sf, sg, sh, 15, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
     677             : 
     678         928 :   if (keylen == 32)  /* 256-bit key */
     679             :     {
     680             :       /* Calculate the remaining two words of the S vector */
     681         459 :       CALC_S (si, sj, sk, sl, 16, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
     682         459 :       CALC_S (si, sj, sk, sl, 17, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
     683         459 :       CALC_S (si, sj, sk, sl, 18, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
     684         459 :       CALC_S (si, sj, sk, sl, 19, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
     685         459 :       CALC_S (si, sj, sk, sl, 20, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
     686         459 :       CALC_S (si, sj, sk, sl, 21, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
     687         459 :       CALC_S (si, sj, sk, sl, 22, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
     688         459 :       CALC_S (si, sj, sk, sl, 23, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
     689         459 :       CALC_S (sm, sn, so, sp, 24, 0x00, 0x2D, 0x01, 0x2D); /* 01 A4 02 A4 */
     690         459 :       CALC_S (sm, sn, so, sp, 25, 0x2D, 0xA4, 0x44, 0x8A); /* A4 56 A1 55 */
     691         459 :       CALC_S (sm, sn, so, sp, 26, 0x8A, 0xD5, 0xBF, 0xD1); /* 55 82 FC 87 */
     692         459 :       CALC_S (sm, sn, so, sp, 27, 0xD1, 0x7F, 0x3D, 0x99); /* 87 F3 C1 5A */
     693         459 :       CALC_S (sm, sn, so, sp, 28, 0x99, 0x46, 0x66, 0x96); /* 5A 1E 47 58 */
     694         459 :       CALC_S (sm, sn, so, sp, 29, 0x96, 0x3C, 0x5B, 0xED); /* 58 C6 AE DB */
     695         459 :       CALC_S (sm, sn, so, sp, 30, 0xED, 0x37, 0x4F, 0xE0); /* DB 68 3D 9E */
     696         459 :       CALC_S (sm, sn, so, sp, 31, 0xE0, 0xD0, 0x8C, 0x17); /* 9E E5 19 03 */
     697             : 
     698             :       /* Compute the S-boxes. */
     699      117963 :       for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
     700             :         {
     701      117504 :           CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
     702             :         }
     703             : 
     704             :       /* Calculate whitening and round subkeys. */
     705        2295 :       for (i = 0; i < 8; i += 2)
     706             :         {
     707        1836 :           CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
     708             :         }
     709        7803 :       for (j = 0; j < 32; j += 2, i += 2)
     710             :         {
     711        7344 :           CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
     712             :         }
     713             :     }
     714             :   else
     715             :     {
     716             :       /* Compute the S-boxes. */
     717      120533 :       for(i=j=0,k=1; i < 256; i++, j += 2, k += 2 )
     718             :         {
     719      120064 :           CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
     720             :         }
     721             : 
     722             :       /* Calculate whitening and round subkeys. */
     723        2345 :       for (i = 0; i < 8; i += 2)
     724             :         {
     725        1876 :           CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
     726             :         }
     727        7973 :       for (j = 0; j < 32; j += 2, i += 2)
     728             :         {
     729        7504 :           CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
     730             :         }
     731             :     }
     732             : 
     733         928 :   return 0;
     734             : }
     735             : 
     736             : static gcry_err_code_t
     737         928 : twofish_setkey (void *context, const byte *key, unsigned int keylen)
     738             : {
     739         928 :   TWOFISH_context *ctx = context;
     740         928 :   unsigned int hwfeatures = _gcry_get_hw_features ();
     741             :   int rc;
     742             : 
     743         928 :   rc = do_twofish_setkey (ctx, key, keylen);
     744             : 
     745             : #ifdef USE_AVX2
     746         928 :   ctx->use_avx2 = 0;
     747         928 :   if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER))
     748             :     {
     749           0 :       ctx->use_avx2 = 1;
     750             :     }
     751             : #endif
     752             : 
     753             :   (void)hwfeatures;
     754             : 
     755         928 :   _gcry_burn_stack (23+6*sizeof(void*));
     756         928 :   return rc;
     757             : }
     758             : 
     759             : 
     760             : #ifdef USE_AVX2
     761             : /* Assembler implementations of Twofish using AVX2.  Process 16 block in
     762             :    parallel.
     763             :  */
     764             : extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
     765             :                                        unsigned char *out,
     766             :                                        const unsigned char *in,
     767             :                                        unsigned char *ctr) ASM_FUNC_ABI;
     768             : 
     769             : extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx,
     770             :                                        unsigned char *out,
     771             :                                        const unsigned char *in,
     772             :                                        unsigned char *iv) ASM_FUNC_ABI;
     773             : 
     774             : extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx,
     775             :                                        unsigned char *out,
     776             :                                        const unsigned char *in,
     777             :                                        unsigned char *iv) ASM_FUNC_ABI;
     778             : 
     779             : extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx,
     780             :                                        unsigned char *out,
     781             :                                        const unsigned char *in,
     782             :                                        unsigned char *offset,
     783             :                                        unsigned char *checksum,
     784             :                                        const u64 Ls[16]) ASM_FUNC_ABI;
     785             : 
     786             : extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx,
     787             :                                        unsigned char *out,
     788             :                                        const unsigned char *in,
     789             :                                        unsigned char *offset,
     790             :                                        unsigned char *checksum,
     791             :                                        const u64 Ls[16]) ASM_FUNC_ABI;
     792             : 
     793             : extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx,
     794             :                                         const unsigned char *abuf,
     795             :                                         unsigned char *offset,
     796             :                                         unsigned char *checksum,
     797             :                                         const u64 Ls[16]) ASM_FUNC_ABI;
     798             : #endif
     799             : 
     800             : 
     801             : #ifdef USE_AMD64_ASM
     802             : 
     803             : /* Assembly implementations of Twofish. */
     804             : extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c,
     805             :                                               byte *out, const byte *in);
     806             : 
     807             : extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
     808             :                                               byte *out, const byte *in);
     809             : 
     810             : /* These assembly implementations process three blocks in parallel. */
     811             : extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
     812             :                                         const byte *in, byte *ctr);
     813             : 
     814             : extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
     815             :                                         const byte *in, byte *iv);
     816             : 
     817             : extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
     818             :                                         const byte *in, byte *iv);
     819             : 
     820             : extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
     821             :                                         const byte *in, byte *offset,
     822             :                                         byte *checksum, const u64 Ls[3]);
     823             : 
     824             : extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
     825             :                                         const byte *in, byte *offset,
     826             :                                         byte *checksum, const u64 Ls[3]);
     827             : 
     828             : extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
     829             :                                          const byte *abuf, byte *offset,
     830             :                                          byte *checksum, const u64 Ls[3]);
     831             : 
     832             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     833             : static inline void
     834             : call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
     835             :               const void *arg3, const void *arg4)
     836             : {
     837             :   /* Call SystemV ABI function without storing non-volatile XMM registers,
     838             :    * as target function does not use vector instruction sets. */
     839             :   asm volatile ("callq *%0\n\t"
     840             :                 : "+a" (fn),
     841             :                   "+D" (arg1),
     842             :                   "+S" (arg2),
     843             :                   "+d" (arg3),
     844             :                   "+c" (arg4)
     845             :                 :
     846             :                 : "cc", "memory", "r8", "r9", "r10", "r11");
     847             : }
     848             : 
     849             : static inline void
     850             : call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2,
     851             :                const void *arg3, const void *arg4, const void *arg5)
     852             : {
     853             :   /* Call SystemV ABI function without storing non-volatile XMM registers,
     854             :    * as target function does not use vector instruction sets. */
     855             :   asm volatile ("movq %[arg5], %%r8\n\t"
     856             :                 "callq *%0\n\t"
     857             :                 : "+a" (fn),
     858             :                   "+D" (arg1),
     859             :                   "+S" (arg2),
     860             :                   "+d" (arg3),
     861             :                   "+c" (arg4)
     862             :                 : [arg5] "g" (arg5)
     863             :                 : "cc", "memory", "r8", "r9", "r10", "r11");
     864             : }
     865             : 
     866             : static inline void
     867             : call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2,
     868             :                const void *arg3, const void *arg4, const void *arg5,
     869             :                const void *arg6)
     870             : {
     871             :   /* Call SystemV ABI function without storing non-volatile XMM registers,
     872             :    * as target function does not use vector instruction sets. */
     873             :   asm volatile ("movq %[arg5], %%r8\n\t"
     874             :                 "movq %[arg6], %%r9\n\t"
     875             :                 "callq *%0\n\t"
     876             :                 : "+a" (fn),
     877             :                   "+D" (arg1),
     878             :                   "+S" (arg2),
     879             :                   "+d" (arg3),
     880             :                   "+c" (arg4)
     881             :                 : [arg5] "g" (arg5),
     882             :                   [arg6] "g" (arg6)
     883             :                 : "cc", "memory", "r8", "r9", "r10", "r11");
     884             : }
     885             : #endif
     886             : 
     887             : static inline void
     888     5985243 : twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
     889             : {
     890             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     891             :   call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL);
     892             : #else
     893     5985243 :   _gcry_twofish_amd64_encrypt_block(c, out, in);
     894             : #endif
     895     5985243 : }
     896             : 
     897             : static inline void
     898      743070 : twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in)
     899             : {
     900             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     901             :   call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL);
     902             : #else
     903      743070 :   _gcry_twofish_amd64_decrypt_block(c, out, in);
     904             : #endif
     905      743070 : }
     906             : 
     907             : static inline void
     908      909010 : twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
     909             :                       byte *ctr)
     910             : {
     911             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     912             :   call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr);
     913             : #else
     914      909010 :   _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
     915             : #endif
     916      909010 : }
     917             : 
     918             : static inline void
     919      235538 : twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
     920             :                       byte *iv)
     921             : {
     922             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     923             :   call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv);
     924             : #else
     925      235538 :   _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
     926             : #endif
     927      235538 : }
     928             : 
     929             : static inline void
     930      191340 : twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
     931             :                       byte *iv)
     932             : {
     933             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     934             :   call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv);
     935             : #else
     936      191340 :   _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
     937             : #endif
     938      191340 : }
     939             : 
     940             : static inline void
     941     1173312 : twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
     942             :                       byte *offset, byte *checksum, const u64 Ls[3])
     943             : {
     944             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     945             :   call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls);
     946             : #else
     947     1173312 :   _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
     948             : #endif
     949     1173312 : }
     950             : 
     951             : static inline void
     952     1156674 : twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
     953             :                       byte *offset, byte *checksum, const u64 Ls[3])
     954             : {
     955             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     956             :   call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls);
     957             : #else
     958     1156674 :   _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
     959             : #endif
     960     1156674 : }
     961             : 
     962             : static inline void
     963     2152568 : twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
     964             :                        byte *offset, byte *checksum, const u64 Ls[3])
     965             : {
     966             : #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
     967             :   call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls);
     968             : #else
     969     2152568 :   _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
     970             : #endif
     971     2152568 : }
     972             : 
     973             : #elif defined(USE_ARM_ASM)
     974             : 
     975             : /* Assembly implementations of Twofish. */
     976             : extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
     977             :                                               byte *out, const byte *in);
     978             : 
     979             : extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
     980             :                                               byte *out, const byte *in);
     981             : 
     982             : #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
     983             : 
     984             : /* Macros to compute the g() function in the encryption and decryption
     985             :  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
     986             :  * rotation for the high 32-bit word. */
     987             : 
     988             : #define G1(a) \
     989             :      (ctx->s[0][(a) & 0xFF]) ^ (ctx->s[1][((a) >> 8) & 0xFF]) \
     990             :    ^ (ctx->s[2][((a) >> 16) & 0xFF]) ^ (ctx->s[3][(a) >> 24])
     991             : 
     992             : #define G2(b) \
     993             :      (ctx->s[1][(b) & 0xFF]) ^ (ctx->s[2][((b) >> 8) & 0xFF]) \
     994             :    ^ (ctx->s[3][((b) >> 16) & 0xFF]) ^ (ctx->s[0][(b) >> 24])
     995             : 
     996             : /* Encryption and decryption Feistel rounds.  Each one calls the two g()
     997             :  * macros, does the PHT, and performs the XOR and the appropriate bit
     998             :  * rotations.  The parameters are the round number (used to select subkeys),
     999             :  * and the four 32-bit chunks of the text. */
    1000             : 
    1001             : #define ENCROUND(n, a, b, c, d) \
    1002             :    x = G1 (a); y = G2 (b); \
    1003             :    x += y; y += x + ctx->k[2 * (n) + 1]; \
    1004             :    (c) ^= x + ctx->k[2 * (n)]; \
    1005             :    (c) = ((c) >> 1) + ((c) << 31); \
    1006             :    (d) = (((d) << 1)+((d) >> 31)) ^ y
    1007             : 
    1008             : #define DECROUND(n, a, b, c, d) \
    1009             :    x = G1 (a); y = G2 (b); \
    1010             :    x += y; y += x; \
    1011             :    (d) ^= y + ctx->k[2 * (n) + 1]; \
    1012             :    (d) = ((d) >> 1) + ((d) << 31); \
    1013             :    (c) = (((c) << 1)+((c) >> 31)); \
    1014             :    (c) ^= (x + ctx->k[2 * (n)])
    1015             : 
    1016             : /* Encryption and decryption cycles; each one is simply two Feistel rounds
    1017             :  * with the 32-bit chunks re-ordered to simulate the "swap" */
    1018             : 
    1019             : #define ENCCYCLE(n) \
    1020             :    ENCROUND (2 * (n), a, b, c, d); \
    1021             :    ENCROUND (2 * (n) + 1, c, d, a, b)
    1022             : 
    1023             : #define DECCYCLE(n) \
    1024             :    DECROUND (2 * (n) + 1, c, d, a, b); \
    1025             :    DECROUND (2 * (n), a, b, c, d)
    1026             : 
    1027             : /* Macros to convert the input and output bytes into 32-bit words,
    1028             :  * and simultaneously perform the whitening step.  INPACK packs word
    1029             :  * number n into the variable named by x, using whitening subkey number m.
    1030             :  * OUTUNPACK unpacks word number n from the variable named by x, using
    1031             :  * whitening subkey number m. */
    1032             : 
    1033             : #define INPACK(n, x, m) \
    1034             :    x = buf_get_le32(in + (n) * 4); \
    1035             :    x ^= ctx->w[m]
    1036             : 
    1037             : #define OUTUNPACK(n, x, m) \
    1038             :    x ^= ctx->w[m]; \
    1039             :    buf_put_le32(out + (n) * 4, x)
    1040             : 
    1041             : #endif /*!USE_AMD64_ASM*/
    1042             : 
    1043             : 
    1044             : /* Encrypt one block.  in and out may be the same. */
    1045             : 
    1046             : #ifdef USE_AMD64_ASM
    1047             : 
    1048             : static unsigned int
    1049     5985243 : twofish_encrypt (void *context, byte *out, const byte *in)
    1050             : {
    1051     5985243 :   TWOFISH_context *ctx = context;
    1052     5985243 :   twofish_amd64_encrypt_block(ctx, out, in);
    1053     5985243 :   return /*burn_stack*/ (4*sizeof (void*));
    1054             : }
    1055             : 
    1056             : #elif defined(USE_ARM_ASM)
    1057             : 
    1058             : static unsigned int
    1059             : twofish_encrypt (void *context, byte *out, const byte *in)
    1060             : {
    1061             :   TWOFISH_context *ctx = context;
    1062             :   _gcry_twofish_arm_encrypt_block(ctx, out, in);
    1063             :   return /*burn_stack*/ (4*sizeof (void*));
    1064             : }
    1065             : 
    1066             : #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
    1067             : 
    1068             : static void
    1069             : do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
    1070             : {
    1071             :   /* The four 32-bit chunks of the text. */
    1072             :   u32 a, b, c, d;
    1073             : 
    1074             :   /* Temporaries used by the round function. */
    1075             :   u32 x, y;
    1076             : 
    1077             :   /* Input whitening and packing. */
    1078             :   INPACK (0, a, 0);
    1079             :   INPACK (1, b, 1);
    1080             :   INPACK (2, c, 2);
    1081             :   INPACK (3, d, 3);
    1082             : 
    1083             :   /* Encryption Feistel cycles. */
    1084             :   ENCCYCLE (0);
    1085             :   ENCCYCLE (1);
    1086             :   ENCCYCLE (2);
    1087             :   ENCCYCLE (3);
    1088             :   ENCCYCLE (4);
    1089             :   ENCCYCLE (5);
    1090             :   ENCCYCLE (6);
    1091             :   ENCCYCLE (7);
    1092             : 
    1093             :   /* Output whitening and unpacking. */
    1094             :   OUTUNPACK (0, c, 4);
    1095             :   OUTUNPACK (1, d, 5);
    1096             :   OUTUNPACK (2, a, 6);
    1097             :   OUTUNPACK (3, b, 7);
    1098             : }
    1099             : 
    1100             : static unsigned int
    1101             : twofish_encrypt (void *context, byte *out, const byte *in)
    1102             : {
    1103             :   TWOFISH_context *ctx = context;
    1104             :   do_twofish_encrypt (ctx, out, in);
    1105             :   return /*burn_stack*/ (24+3*sizeof (void*));
    1106             : }
    1107             : 
    1108             : #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
    1109             : 
    1110             : 
    1111             : /* Decrypt one block.  in and out may be the same. */
    1112             : 
    1113             : #ifdef USE_AMD64_ASM
    1114             : 
    1115             : static unsigned int
    1116      743070 : twofish_decrypt (void *context, byte *out, const byte *in)
    1117             : {
    1118      743070 :   TWOFISH_context *ctx = context;
    1119      743070 :   twofish_amd64_decrypt_block(ctx, out, in);
    1120      743070 :   return /*burn_stack*/ (4*sizeof (void*));
    1121             : }
    1122             : 
    1123             : #elif defined(USE_ARM_ASM)
    1124             : 
    1125             : static unsigned int
    1126             : twofish_decrypt (void *context, byte *out, const byte *in)
    1127             : {
    1128             :   TWOFISH_context *ctx = context;
    1129             :   _gcry_twofish_arm_decrypt_block(ctx, out, in);
    1130             :   return /*burn_stack*/ (4*sizeof (void*));
    1131             : }
    1132             : 
    1133             : #else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
    1134             : 
    1135             : static void
    1136             : do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
    1137             : {
    1138             :   /* The four 32-bit chunks of the text. */
    1139             :   u32 a, b, c, d;
    1140             : 
    1141             :   /* Temporaries used by the round function. */
    1142             :   u32 x, y;
    1143             : 
    1144             :   /* Input whitening and packing. */
    1145             :   INPACK (0, c, 4);
    1146             :   INPACK (1, d, 5);
    1147             :   INPACK (2, a, 6);
    1148             :   INPACK (3, b, 7);
    1149             : 
    1150             :   /* Encryption Feistel cycles. */
    1151             :   DECCYCLE (7);
    1152             :   DECCYCLE (6);
    1153             :   DECCYCLE (5);
    1154             :   DECCYCLE (4);
    1155             :   DECCYCLE (3);
    1156             :   DECCYCLE (2);
    1157             :   DECCYCLE (1);
    1158             :   DECCYCLE (0);
    1159             : 
    1160             :   /* Output whitening and unpacking. */
    1161             :   OUTUNPACK (0, a, 0);
    1162             :   OUTUNPACK (1, b, 1);
    1163             :   OUTUNPACK (2, c, 2);
    1164             :   OUTUNPACK (3, d, 3);
    1165             : }
    1166             : 
    1167             : static unsigned int
    1168             : twofish_decrypt (void *context, byte *out, const byte *in)
    1169             : {
    1170             :   TWOFISH_context *ctx = context;
    1171             : 
    1172             :   do_twofish_decrypt (ctx, out, in);
    1173             :   return /*burn_stack*/ (24+3*sizeof (void*));
    1174             : }
    1175             : 
    1176             : #endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
    1177             : 
    1178             : 
    1179             : 
    1180             : /* Bulk encryption of complete blocks in CTR mode.  This function is only
    1181             :    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
    1182             :    of size TWOFISH_BLOCKSIZE. */
    1183             : void
    1184       32666 : _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
    1185             :                       const void *inbuf_arg, size_t nblocks)
    1186             : {
    1187       32666 :   TWOFISH_context *ctx = context;
    1188       32666 :   unsigned char *outbuf = outbuf_arg;
    1189       32666 :   const unsigned char *inbuf = inbuf_arg;
    1190             :   unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
    1191       32666 :   unsigned int burn, burn_stack_depth = 0;
    1192             :   int i;
    1193             : 
    1194             : #ifdef USE_AVX2
    1195       32666 :   if (ctx->use_avx2)
    1196             :     {
    1197           0 :       int did_use_avx2 = 0;
    1198             : 
    1199             :       /* Process data in 16 block chunks. */
    1200           0 :       while (nblocks >= 16)
    1201             :         {
    1202           0 :           _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
    1203             : 
    1204           0 :           nblocks -= 16;
    1205           0 :           outbuf += 16 * TWOFISH_BLOCKSIZE;
    1206           0 :           inbuf  += 16 * TWOFISH_BLOCKSIZE;
    1207           0 :           did_use_avx2 = 1;
    1208             :         }
    1209             : 
    1210           0 :       if (did_use_avx2)
    1211             :         {
    1212             :           /* twofish-avx2 assembly code does not use stack */
    1213           0 :           if (nblocks == 0)
    1214           0 :             burn_stack_depth = 0;
    1215             :         }
    1216             :     }
    1217             : #endif
    1218             : 
    1219             : #ifdef USE_AMD64_ASM
    1220             :   {
    1221             :     /* Process data in 3 block chunks. */
    1222      974342 :     while (nblocks >= 3)
    1223             :       {
    1224      909010 :         twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
    1225             : 
    1226      909010 :         nblocks -= 3;
    1227      909010 :         outbuf += 3 * TWOFISH_BLOCKSIZE;
    1228      909010 :         inbuf += 3 * TWOFISH_BLOCKSIZE;
    1229             : 
    1230      909010 :         burn = 8 * sizeof(void*);
    1231      909010 :         if (burn > burn_stack_depth)
    1232       30376 :           burn_stack_depth = burn;
    1233             :       }
    1234             : 
    1235             :     /* Use generic code to handle smaller chunks... */
    1236             :     /* TODO: use caching instead? */
    1237             :   }
    1238             : #endif
    1239             : 
    1240       64862 :   for ( ;nblocks; nblocks-- )
    1241             :     {
    1242             :       /* Encrypt the counter. */
    1243       32196 :       burn = twofish_encrypt(ctx, tmpbuf, ctr);
    1244       32196 :       if (burn > burn_stack_depth)
    1245        2290 :         burn_stack_depth = burn;
    1246             : 
    1247             :       /* XOR the input with the encrypted counter and store in output.  */
    1248       32196 :       buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
    1249       32196 :       outbuf += TWOFISH_BLOCKSIZE;
    1250       32196 :       inbuf  += TWOFISH_BLOCKSIZE;
    1251             :       /* Increment the counter.  */
    1252       33389 :       for (i = TWOFISH_BLOCKSIZE; i > 0; i--)
    1253             :         {
    1254       33385 :           ctr[i-1]++;
    1255       33385 :           if (ctr[i-1])
    1256       32192 :             break;
    1257             :         }
    1258             :     }
    1259             : 
    1260       32666 :   wipememory(tmpbuf, sizeof(tmpbuf));
    1261       32666 :   _gcry_burn_stack(burn_stack_depth);
    1262       32666 : }
    1263             : 
    1264             : 
    1265             : /* Bulk decryption of complete blocks in CBC mode.  This function is only
    1266             :    intended for the bulk encryption feature of cipher.c. */
    1267             : void
    1268        8156 : _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
    1269             :                       const void *inbuf_arg, size_t nblocks)
    1270             : {
    1271        8156 :   TWOFISH_context *ctx = context;
    1272        8156 :   unsigned char *outbuf = outbuf_arg;
    1273        8156 :   const unsigned char *inbuf = inbuf_arg;
    1274             :   unsigned char savebuf[TWOFISH_BLOCKSIZE];
    1275        8156 :   unsigned int burn, burn_stack_depth = 0;
    1276             : 
    1277             : #ifdef USE_AVX2
    1278        8156 :   if (ctx->use_avx2)
    1279             :     {
    1280           0 :       int did_use_avx2 = 0;
    1281             : 
    1282             :       /* Process data in 16 block chunks. */
    1283           0 :       while (nblocks >= 16)
    1284             :         {
    1285           0 :           _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
    1286             : 
    1287           0 :           nblocks -= 16;
    1288           0 :           outbuf += 16 * TWOFISH_BLOCKSIZE;
    1289           0 :           inbuf  += 16 * TWOFISH_BLOCKSIZE;
    1290           0 :           did_use_avx2 = 1;
    1291             :         }
    1292             : 
    1293           0 :       if (did_use_avx2)
    1294             :         {
    1295             :           /* twofish-avx2 assembly code does not use stack */
    1296           0 :           if (nblocks == 0)
    1297           0 :             burn_stack_depth = 0;
    1298             :         }
    1299             :     }
    1300             : #endif
    1301             : 
    1302             : #ifdef USE_AMD64_ASM
    1303             :   {
    1304             :     /* Process data in 3 block chunks. */
    1305      251850 :     while (nblocks >= 3)
    1306             :       {
    1307      235538 :         twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
    1308             : 
    1309      235538 :         nblocks -= 3;
    1310      235538 :         outbuf += 3 * TWOFISH_BLOCKSIZE;
    1311      235538 :         inbuf += 3 * TWOFISH_BLOCKSIZE;
    1312             : 
    1313      235538 :         burn = 9 * sizeof(void*);
    1314      235538 :         if (burn > burn_stack_depth)
    1315        7252 :           burn_stack_depth = burn;
    1316             :       }
    1317             : 
    1318             :     /* Use generic code to handle smaller chunks... */
    1319             :   }
    1320             : #endif
    1321             : 
    1322       16374 :   for ( ;nblocks; nblocks-- )
    1323             :     {
    1324             :       /* INBUF is needed later and it may be identical to OUTBUF, so store
    1325             :          the intermediate result to SAVEBUF.  */
    1326        8218 :       burn = twofish_decrypt (ctx, savebuf, inbuf);
    1327        8218 :       if (burn > burn_stack_depth)
    1328         904 :         burn_stack_depth = burn;
    1329             : 
    1330        8218 :       buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
    1331        8218 :       inbuf += TWOFISH_BLOCKSIZE;
    1332        8218 :       outbuf += TWOFISH_BLOCKSIZE;
    1333             :     }
    1334             : 
    1335        8156 :   wipememory(savebuf, sizeof(savebuf));
    1336        8156 :   _gcry_burn_stack(burn_stack_depth);
    1337        8156 : }
    1338             : 
    1339             : 
    1340             : /* Bulk decryption of complete blocks in CFB mode.  This function is only
    1341             :    intended for the bulk encryption feature of cipher.c. */
    1342             : void
    1343        6232 : _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
    1344             :                     const void *inbuf_arg, size_t nblocks)
    1345             : {
    1346        6232 :   TWOFISH_context *ctx = context;
    1347        6232 :   unsigned char *outbuf = outbuf_arg;
    1348        6232 :   const unsigned char *inbuf = inbuf_arg;
    1349        6232 :   unsigned int burn, burn_stack_depth = 0;
    1350             : 
    1351             : #ifdef USE_AVX2
    1352        6232 :   if (ctx->use_avx2)
    1353             :     {
    1354           0 :       int did_use_avx2 = 0;
    1355             : 
    1356             :       /* Process data in 16 block chunks. */
    1357           0 :       while (nblocks >= 16)
    1358             :         {
    1359           0 :           _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
    1360             : 
    1361           0 :           nblocks -= 16;
    1362           0 :           outbuf += 16 * TWOFISH_BLOCKSIZE;
    1363           0 :           inbuf  += 16 * TWOFISH_BLOCKSIZE;
    1364           0 :           did_use_avx2 = 1;
    1365             :         }
    1366             : 
    1367           0 :       if (did_use_avx2)
    1368             :         {
    1369             :           /* twofish-avx2 assembly code does not use stack */
    1370           0 :           if (nblocks == 0)
    1371           0 :             burn_stack_depth = 0;
    1372             :         }
    1373             :     }
    1374             : #endif
    1375             : 
    1376             : #ifdef USE_AMD64_ASM
    1377             :   {
    1378             :     /* Process data in 3 block chunks. */
    1379      203804 :     while (nblocks >= 3)
    1380             :       {
    1381      191340 :         twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
    1382             : 
    1383      191340 :         nblocks -= 3;
    1384      191340 :         outbuf += 3 * TWOFISH_BLOCKSIZE;
    1385      191340 :         inbuf += 3 * TWOFISH_BLOCKSIZE;
    1386             : 
    1387      191340 :         burn = 8 * sizeof(void*);
    1388      191340 :         if (burn > burn_stack_depth)
    1389        6100 :           burn_stack_depth = burn;
    1390             :       }
    1391             : 
    1392             :     /* Use generic code to handle smaller chunks... */
    1393             :   }
    1394             : #endif
    1395             : 
    1396       12768 :   for ( ;nblocks; nblocks-- )
    1397             :     {
    1398        6536 :       burn = twofish_encrypt(ctx, iv, iv);
    1399        6536 :       if (burn > burn_stack_depth)
    1400         132 :         burn_stack_depth = burn;
    1401             : 
    1402        6536 :       buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
    1403        6536 :       outbuf += TWOFISH_BLOCKSIZE;
    1404        6536 :       inbuf += TWOFISH_BLOCKSIZE;
    1405             :     }
    1406             : 
    1407        6232 :   _gcry_burn_stack(burn_stack_depth);
    1408        6232 : }
    1409             : 
    1410             : /* Bulk encryption/decryption of complete blocks in OCB mode. */
    1411             : size_t
    1412        9666 : _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
    1413             :                         const void *inbuf_arg, size_t nblocks, int encrypt)
    1414             : {
    1415             : #ifdef USE_AMD64_ASM
    1416        9666 :   TWOFISH_context *ctx = (void *)&c->context.c;
    1417        9666 :   unsigned char *outbuf = outbuf_arg;
    1418        9666 :   const unsigned char *inbuf = inbuf_arg;
    1419        9666 :   unsigned int burn, burn_stack_depth = 0;
    1420        9666 :   u64 blkn = c->u_mode.ocb.data_nblocks;
    1421             : 
    1422             : #ifdef USE_AVX2
    1423        9666 :   if (ctx->use_avx2)
    1424             :     {
    1425           0 :       int did_use_avx2 = 0;
    1426             :       u64 Ls[16];
    1427           0 :       unsigned int n = 16 - (blkn % 16);
    1428             :       u64 *l;
    1429             :       int i;
    1430             : 
    1431           0 :       if (nblocks >= 16)
    1432             :         {
    1433           0 :           for (i = 0; i < 16; i += 8)
    1434             :             {
    1435             :               /* Use u64 to store pointers for x32 support (assembly function
    1436             :                * assumes 64-bit pointers). */
    1437           0 :               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1438           0 :               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1439           0 :               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1440           0 :               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1441           0 :               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1442           0 :               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1443           0 :               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1444             :             }
    1445             : 
    1446           0 :           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
    1447           0 :           l = &Ls[(15 + n) % 16];
    1448             : 
    1449             :           /* Process data in 16 block chunks. */
    1450           0 :           while (nblocks >= 16)
    1451             :             {
    1452           0 :               blkn += 16;
    1453           0 :               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
    1454             : 
    1455           0 :               if (encrypt)
    1456           0 :                 _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
    1457           0 :                                           c->u_ctr.ctr, Ls);
    1458             :               else
    1459           0 :                 _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
    1460           0 :                                           c->u_ctr.ctr, Ls);
    1461             : 
    1462           0 :               nblocks -= 16;
    1463           0 :               outbuf += 16 * TWOFISH_BLOCKSIZE;
    1464           0 :               inbuf  += 16 * TWOFISH_BLOCKSIZE;
    1465           0 :               did_use_avx2 = 1;
    1466             :             }
    1467             :         }
    1468             : 
    1469           0 :       if (did_use_avx2)
    1470             :         {
    1471             :           /* twofish-avx2 assembly code does not use stack */
    1472           0 :           if (nblocks == 0)
    1473           0 :             burn_stack_depth = 0;
    1474             :         }
    1475             :     }
    1476             : #endif
    1477             : 
    1478             :   {
    1479             :     /* Use u64 to store pointers for x32 support (assembly function
    1480             :       * assumes 64-bit pointers). */
    1481             :     u64 Ls[3];
    1482             : 
    1483             :     /* Process data in 3 block chunks. */
    1484     2349318 :     while (nblocks >= 3)
    1485             :       {
    1486     2329986 :         Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
    1487     2329986 :         Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
    1488     2329986 :         Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
    1489     2329986 :         blkn += 3;
    1490             : 
    1491     2329986 :         if (encrypt)
    1492     1173312 :           twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
    1493             :                                 Ls);
    1494             :         else
    1495     1156674 :           twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
    1496             :                                 Ls);
    1497             : 
    1498     2329986 :         nblocks -= 3;
    1499     2329986 :         outbuf += 3 * TWOFISH_BLOCKSIZE;
    1500     2329986 :         inbuf  += 3 * TWOFISH_BLOCKSIZE;
    1501             : 
    1502     2329986 :         burn = 8 * sizeof(void*);
    1503     2329986 :         if (burn > burn_stack_depth)
    1504        8864 :           burn_stack_depth = burn;
    1505             :       }
    1506             : 
    1507             :     /* Use generic code to handle smaller chunks... */
    1508             :   }
    1509             : 
    1510        9666 :   c->u_mode.ocb.data_nblocks = blkn;
    1511             : 
    1512        9666 :   if (burn_stack_depth)
    1513        8864 :     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
    1514             : #else
    1515             :   (void)c;
    1516             :   (void)outbuf_arg;
    1517             :   (void)inbuf_arg;
    1518             :   (void)encrypt;
    1519             : #endif
    1520             : 
    1521        9666 :   return nblocks;
    1522             : }
    1523             : 
    1524             : /* Bulk authentication of complete blocks in OCB mode. */
    1525             : size_t
    1526        1524 : _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
    1527             :                         size_t nblocks)
    1528             : {
    1529             : #ifdef USE_AMD64_ASM
    1530        1524 :   TWOFISH_context *ctx = (void *)&c->context.c;
    1531        1524 :   const unsigned char *abuf = abuf_arg;
    1532        1524 :   unsigned int burn, burn_stack_depth = 0;
    1533        1524 :   u64 blkn = c->u_mode.ocb.aad_nblocks;
    1534             : 
    1535             : #ifdef USE_AVX2
    1536        1524 :   if (ctx->use_avx2)
    1537             :     {
    1538           0 :       int did_use_avx2 = 0;
    1539             :       u64 Ls[16];
    1540           0 :       unsigned int n = 16 - (blkn % 16);
    1541             :       u64 *l;
    1542             :       int i;
    1543             : 
    1544           0 :       if (nblocks >= 16)
    1545             :         {
    1546           0 :           for (i = 0; i < 16; i += 8)
    1547             :             {
    1548             :               /* Use u64 to store pointers for x32 support (assembly function
    1549             :                * assumes 64-bit pointers). */
    1550           0 :               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1551           0 :               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1552           0 :               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1553           0 :               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
    1554           0 :               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1555           0 :               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
    1556           0 :               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
    1557             :             }
    1558             : 
    1559           0 :           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
    1560           0 :           l = &Ls[(15 + n) % 16];
    1561             : 
    1562             :           /* Process data in 16 block chunks. */
    1563           0 :           while (nblocks >= 16)
    1564             :             {
    1565           0 :               blkn += 16;
    1566           0 :               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
    1567             : 
    1568           0 :               _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
    1569           0 :                                           c->u_mode.ocb.aad_sum, Ls);
    1570             : 
    1571           0 :               nblocks -= 16;
    1572           0 :               abuf += 16 * TWOFISH_BLOCKSIZE;
    1573           0 :               did_use_avx2 = 1;
    1574             :             }
    1575             :         }
    1576             : 
    1577           0 :       if (did_use_avx2)
    1578             :         {
    1579             :           /* twofish-avx2 assembly code does not use stack */
    1580           0 :           if (nblocks == 0)
    1581           0 :             burn_stack_depth = 0;
    1582             :         }
    1583             : 
    1584             :       /* Use generic code to handle smaller chunks... */
    1585             :     }
    1586             : #endif
    1587             : 
    1588             :   {
    1589             :     /* Use u64 to store pointers for x32 support (assembly function
    1590             :       * assumes 64-bit pointers). */
    1591             :     u64 Ls[3];
    1592             : 
    1593             :     /* Process data in 3 block chunks. */
    1594     2155616 :     while (nblocks >= 3)
    1595             :       {
    1596     2152568 :         Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
    1597     2152568 :         Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
    1598     2152568 :         Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
    1599     2152568 :         blkn += 3;
    1600             : 
    1601     2152568 :         twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
    1602     2152568 :                               c->u_mode.ocb.aad_sum, Ls);
    1603             : 
    1604     2152568 :         nblocks -= 3;
    1605     2152568 :         abuf += 3 * TWOFISH_BLOCKSIZE;
    1606             : 
    1607     2152568 :         burn = 8 * sizeof(void*);
    1608     2152568 :         if (burn > burn_stack_depth)
    1609        1392 :           burn_stack_depth = burn;
    1610             :       }
    1611             : 
    1612             :     /* Use generic code to handle smaller chunks... */
    1613             :   }
    1614             : 
    1615        1524 :   c->u_mode.ocb.aad_nblocks = blkn;
    1616             : 
    1617        1524 :   if (burn_stack_depth)
    1618        1392 :     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
    1619             : #else
    1620             :   (void)c;
    1621             :   (void)abuf_arg;
    1622             : #endif
    1623             : 
    1624        1524 :   return nblocks;
    1625             : }
    1626             : 
    1627             : 
    1628             : 
    1629             : /* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR
    1630             :    encryption.  Returns NULL on success. */
    1631             : static const char *
    1632           4 : selftest_ctr (void)
    1633             : {
    1634           4 :   const int nblocks = 16+1;
    1635           4 :   const int blocksize = TWOFISH_BLOCKSIZE;
    1636           4 :   const int context_size = sizeof(TWOFISH_context);
    1637             : 
    1638           4 :   return _gcry_selftest_helper_ctr("TWOFISH", &twofish_setkey,
    1639             :            &twofish_encrypt, &_gcry_twofish_ctr_enc, nblocks, blocksize,
    1640             :            context_size);
    1641             : }
    1642             : 
    1643             : /* Run the self-tests for TWOFISH-CBC, tests bulk CBC decryption.
    1644             :    Returns NULL on success. */
    1645             : static const char *
    1646           4 : selftest_cbc (void)
    1647             : {
    1648           4 :   const int nblocks = 16+2;
    1649           4 :   const int blocksize = TWOFISH_BLOCKSIZE;
    1650           4 :   const int context_size = sizeof(TWOFISH_context);
    1651             : 
    1652           4 :   return _gcry_selftest_helper_cbc("TWOFISH", &twofish_setkey,
    1653             :            &twofish_encrypt, &_gcry_twofish_cbc_dec, nblocks, blocksize,
    1654             :            context_size);
    1655             : }
    1656             : 
    1657             : /* Run the self-tests for TWOFISH-CFB, tests bulk CBC decryption.
    1658             :    Returns NULL on success. */
    1659             : static const char *
    1660           4 : selftest_cfb (void)
    1661             : {
    1662           4 :   const int nblocks = 16+2;
    1663           4 :   const int blocksize = TWOFISH_BLOCKSIZE;
    1664           4 :   const int context_size = sizeof(TWOFISH_context);
    1665             : 
    1666           4 :   return _gcry_selftest_helper_cfb("TWOFISH", &twofish_setkey,
    1667             :            &twofish_encrypt, &_gcry_twofish_cfb_dec, nblocks, blocksize,
    1668             :            context_size);
    1669             : }
    1670             : 
    1671             : 
    1672             : /* Test a single encryption and decryption with each key size. */
    1673             : 
    1674             : static const char*
    1675           4 : selftest (void)
    1676             : {
    1677             :   TWOFISH_context ctx; /* Expanded key. */
    1678             :   byte scratch[16];     /* Encryption/decryption result buffer. */
    1679             :   const char *r;
    1680             : 
    1681             :   /* Test vectors for single encryption/decryption.  Note that I am using
    1682             :    * the vectors from the Twofish paper's "known answer test", I=3 for
    1683             :    * 128-bit and I=4 for 256-bit, instead of the all-0 vectors from the
    1684             :    * "intermediate value test", because an all-0 key would trigger all the
    1685             :    * special cases in the RS matrix multiply, leaving the math untested. */
    1686             :   static  byte plaintext[16] = {
    1687             :     0xD4, 0x91, 0xDB, 0x16, 0xE7, 0xB1, 0xC3, 0x9E,
    1688             :     0x86, 0xCB, 0x08, 0x6B, 0x78, 0x9F, 0x54, 0x19
    1689             :   };
    1690             :   static byte key[16] = {
    1691             :     0x9F, 0x58, 0x9F, 0x5C, 0xF6, 0x12, 0x2C, 0x32,
    1692             :     0xB6, 0xBF, 0xEC, 0x2F, 0x2A, 0xE8, 0xC3, 0x5A
    1693             :   };
    1694             :   static const byte ciphertext[16] = {
    1695             :     0x01, 0x9F, 0x98, 0x09, 0xDE, 0x17, 0x11, 0x85,
    1696             :     0x8F, 0xAA, 0xC3, 0xA3, 0xBA, 0x20, 0xFB, 0xC3
    1697             :   };
    1698             :   static byte plaintext_256[16] = {
    1699             :     0x90, 0xAF, 0xE9, 0x1B, 0xB2, 0x88, 0x54, 0x4F,
    1700             :     0x2C, 0x32, 0xDC, 0x23, 0x9B, 0x26, 0x35, 0xE6
    1701             :   };
    1702             :   static byte key_256[32] = {
    1703             :     0xD4, 0x3B, 0xB7, 0x55, 0x6E, 0xA3, 0x2E, 0x46,
    1704             :     0xF2, 0xA2, 0x82, 0xB7, 0xD4, 0x5B, 0x4E, 0x0D,
    1705             :     0x57, 0xFF, 0x73, 0x9D, 0x4D, 0xC9, 0x2C, 0x1B,
    1706             :     0xD7, 0xFC, 0x01, 0x70, 0x0C, 0xC8, 0x21, 0x6F
    1707             :   };
    1708             :   static const byte ciphertext_256[16] = {
    1709             :     0x6C, 0xB4, 0x56, 0x1C, 0x40, 0xBF, 0x0A, 0x97,
    1710             :     0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
    1711             :   };
    1712             : 
    1713           4 :   twofish_setkey (&ctx, key, sizeof(key));
    1714           4 :   twofish_encrypt (&ctx, scratch, plaintext);
    1715           4 :   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
    1716           0 :     return "Twofish-128 test encryption failed.";
    1717           4 :   twofish_decrypt (&ctx, scratch, scratch);
    1718           4 :   if (memcmp (scratch, plaintext, sizeof (plaintext)))
    1719           0 :     return "Twofish-128 test decryption failed.";
    1720             : 
    1721           4 :   twofish_setkey (&ctx, key_256, sizeof(key_256));
    1722           4 :   twofish_encrypt (&ctx, scratch, plaintext_256);
    1723           4 :   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
    1724           0 :     return "Twofish-256 test encryption failed.";
    1725           4 :   twofish_decrypt (&ctx, scratch, scratch);
    1726           4 :   if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
    1727           0 :     return "Twofish-256 test decryption failed.";
    1728             : 
    1729           4 :   if ((r = selftest_ctr()) != NULL)
    1730           0 :     return r;
    1731           4 :   if ((r = selftest_cbc()) != NULL)
    1732           0 :     return r;
    1733           4 :   if ((r = selftest_cfb()) != NULL)
    1734           0 :     return r;
    1735             : 
    1736           4 :   return NULL;
    1737             : }
    1738             : 
    1739             : /* More complete test program.  This does 1000 encryptions and decryptions
    1740             :  * with each of 250 128-bit keys and 2000 encryptions and decryptions with
    1741             :  * each of 125 256-bit keys, using a feedback scheme similar to a Feistel
    1742             :  * cipher, so as to be sure of testing all the table entries pretty
    1743             :  * thoroughly.  We keep changing the keys so as to get a more meaningful
    1744             :  * performance number, since the key setup is non-trivial for Twofish. */
    1745             : 
    1746             : #ifdef TEST
    1747             : 
    1748             : #include <stdio.h>
    1749             : #include <string.h>
    1750             : #include <time.h>
    1751             : 
    1752             : int
    1753             : main()
    1754             : {
    1755             :   TWOFISH_context ctx;     /* Expanded key. */
    1756             :   int i, j;                 /* Loop counters. */
    1757             : 
    1758             :   const char *encrypt_msg; /* Message to print regarding encryption test;
    1759             :                             * the printf is done outside the loop to avoid
    1760             :                             * stuffing up the timing. */
    1761             :   clock_t timer; /* For computing elapsed time. */
    1762             : 
    1763             :   /* Test buffer. */
    1764             :   byte buffer[4][16] = {
    1765             :     {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
    1766             :      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
    1767             :     {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
    1768             :      0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
    1769             :     {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
    1770             :      0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
    1771             :     {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
    1772             :      0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
    1773             :   };
    1774             : 
    1775             :   /* Expected outputs for the million-operation test */
    1776             :   static const byte test_encrypt[4][16] = {
    1777             :     {0xC8, 0x23, 0xB8, 0xB7, 0x6B, 0xFE, 0x91, 0x13,
    1778             :      0x2F, 0xA7, 0x5E, 0xE6, 0x94, 0x77, 0x6F, 0x6B},
    1779             :     {0x90, 0x36, 0xD8, 0x29, 0xD5, 0x96, 0xC2, 0x8E,
    1780             :      0xE4, 0xFF, 0x76, 0xBC, 0xE5, 0x77, 0x88, 0x27},
    1781             :     {0xB8, 0x78, 0x69, 0xAF, 0x42, 0x8B, 0x48, 0x64,
    1782             :      0xF7, 0xE9, 0xF3, 0x9C, 0x42, 0x18, 0x7B, 0x73},
    1783             :     {0x7A, 0x88, 0xFB, 0xEB, 0x90, 0xA4, 0xB4, 0xA8,
    1784             :      0x43, 0xA3, 0x1D, 0xF1, 0x26, 0xC4, 0x53, 0x57}
    1785             :   };
    1786             :   static const byte test_decrypt[4][16] = {
    1787             :     {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
    1788             :      0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF},
    1789             :     {0x0F, 0x1E, 0x2D, 0x3C, 0x4B, 0x5A, 0x69, 0x78,
    1790             :      0x87, 0x96, 0xA5, 0xB4, 0xC3, 0xD2 ,0xE1, 0xF0},
    1791             :     {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
    1792             :      0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54 ,0x32, 0x10},
    1793             :     {0x01, 0x23, 0x45, 0x67, 0x76, 0x54 ,0x32, 0x10,
    1794             :      0x89, 0xAB, 0xCD, 0xEF, 0xFE, 0xDC, 0xBA, 0x98}
    1795             :   };
    1796             : 
    1797             :   /* Start the timer ticking. */
    1798             :   timer = clock ();
    1799             : 
    1800             :   /* Encryption test. */
    1801             :   for (i = 0; i < 125; i++)
    1802             :     {
    1803             :       twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
    1804             :       for (j = 0; j < 1000; j++)
    1805             :         twofish_encrypt (&ctx, buffer[2], buffer[2]);
    1806             :       twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
    1807             :       for (j = 0; j < 1000; j++)
    1808             :         twofish_encrypt (&ctx, buffer[3], buffer[3]);
    1809             :       twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
    1810             :       for (j = 0; j < 1000; j++) {
    1811             :         twofish_encrypt (&ctx, buffer[0], buffer[0]);
    1812             :         twofish_encrypt (&ctx, buffer[1], buffer[1]);
    1813             :       }
    1814             :     }
    1815             :   encrypt_msg = memcmp (buffer, test_encrypt, sizeof (test_encrypt)) ?
    1816             :     "encryption failure!\n" : "encryption OK!\n";
    1817             : 
    1818             :   /* Decryption test. */
    1819             :   for (i = 0; i < 125; i++)
    1820             :     {
    1821             :       twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
    1822             :       for (j = 0; j < 1000; j++) {
    1823             :         twofish_decrypt (&ctx, buffer[0], buffer[0]);
    1824             :         twofish_decrypt (&ctx, buffer[1], buffer[1]);
    1825             :       }
    1826             :       twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
    1827             :       for (j = 0; j < 1000; j++)
    1828             :         twofish_decrypt (&ctx, buffer[3], buffer[3]);
    1829             :       twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
    1830             :       for (j = 0; j < 1000; j++)
    1831             :         twofish_decrypt (&ctx, buffer[2], buffer[2]);
    1832             :     }
    1833             : 
    1834             :   /* Stop the timer, and print results. */
    1835             :   timer = clock () - timer;
    1836             :   printf (encrypt_msg);
    1837             :   printf (memcmp (buffer, test_decrypt, sizeof (test_decrypt)) ?
    1838             :           "decryption failure!\n" : "decryption OK!\n");
    1839             :   printf ("elapsed time: %.1f s.\n", (float) timer / CLOCKS_PER_SEC);
    1840             : 
    1841             :   return 0;
    1842             : }
    1843             : 
    1844             : #endif /* TEST */
    1845             : 
    1846             : 
    1847             : 
    1848             : gcry_cipher_spec_t _gcry_cipher_spec_twofish =
    1849             :   {
    1850             :     GCRY_CIPHER_TWOFISH, {0, 0},
    1851             :     "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context),
    1852             :     twofish_setkey, twofish_encrypt, twofish_decrypt
    1853             :   };
    1854             : 
    1855             : gcry_cipher_spec_t _gcry_cipher_spec_twofish128 =
    1856             :   {
    1857             :     GCRY_CIPHER_TWOFISH128, {0, 0},
    1858             :     "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context),
    1859             :     twofish_setkey, twofish_encrypt, twofish_decrypt
    1860             :   };

Generated by: LCOV version 1.13