/*
 * The source for this file AUTO-GENERATED on:
 * Sat Dec  6 12:42:49 UTC 2025
 *
 * NOTE.  This file IS 100% auto-generated code.
 *
 * **DO NOT** edit any portion of this .c file. ANY change
 * made to this file WILL BE LOST, the next time the
 * 'make distclean' is run, or whenever the 'base' files
 * which generate this file are modified.
 *
 * If bugs are found, then they must be fixed in the file(s)
 * of dynamic_big_crypt_header.cin, which is the 'common'
 * functions, data and includes used by all hashes, OR
 * the bug must be fixed (carefully) in the file
 * dynamic_big_crypt_hash.cin   This second file is
 * a 100% common file.  Each of the hashes we have, MUST
 * be able to be run from this file, after it is run
 * through the dynamic_big_crypt_chopper.pl filter program
 * which was built specifically for this task. The
 * existing command line arguments which run
 * dynamic_big_crypt_chopper.pl are in the shell script
 * dynamic_big_crypt_generator.sh which is the script
 * what actually generated THIS file.
 */

/*****************************************************************************
 * This software was written by Jim Fougeron jfoug AT cox dot net
 * in 2013-2016. No copyright is claimed, and the software is hereby
 * placed in the public domain. In case this attempt to disclaim
 * copyright and place the software in the public domain is deemed
 * null and void, then the software is Copyright (c) 2013-2016 Jim Fougeron
 * and it is hereby released to the general public under the following
 * terms:
 *
 * This software may be modified, redistributed, and used for any
 * purpose, in source and binary forms, with or without modification.
 *
 * Generic 'scriptable' hash cracker for JtR.  These are the 'larger' crypt
 * items. They have been separated from dynamic_fmt.c, and placed into this
 * stand alone file.  In this code, there are a lot of lines of code, but
 * the code is very cookie cutter.
 *
 *  NOTE the code was so cookie cutter, that in summer 2015, it was simply
 *       DELETED from the version control.  Now the code (THIS FILE), is
 *       auto-generated by the ./configure script.  All hashes will use
 *       the same prototype code, so once that code is written correctly
 *       then all hashes should work FINE. Also, if there is an optimization
 *       or extension found, then simply editing the prototype code and
 *       rerunning configure will get all hashes fixed, or updated.
 *
 *  NOTE All other sections of code that need 'hand edited' in the dynamic
 *       set of files, when adding a new hash type can be found by searching
 *       for this string:   LARGE_HASH_EDIT_POINT
 *
 ****************************************************************************/

#if AC_BUILT
#include "autoconfig.h"
#endif
#ifndef DYNAMIC_DISABLED

#include <stdint.h>

#include "openssl_local_overrides.h"

#include "arch.h"

#if defined(SIMD_COEF_32) && !ARCH_LITTLE_ENDIAN
	#undef SIMD_COEF_32
	#undef SIMD_COEF_64
	#undef SIMD_PARA_MD5
	#undef SIMD_PARA_MD4
	#undef SIMD_PARA_SHA1
	#undef SIMD_PARA_SHA256
	#undef SIMD_PARA_SHA512
	#define BITS ARCH_BITS_STR
#endif

#if !FAST_FORMATS_OMP
#ifdef _OPENMP
  #define FORCE_THREAD_MD5_body
#endif
#undef _OPENMP
#endif

#include "misc.h"
#include "common.h"
#include "formats.h"
#include "sha.h"
#include "sha2.h"
#include "md5.h"
#include "md4.h"
#include "dynamic.h"
#include "johnswap.h"
#include "simd-intrinsics.h"
#include "dynamic_types.h"

/*
 * NOTE!!!!  All hash types must use a CTX structure, and the 'final' function
 * must be the same syntax as the oSSL interface.  final(uchar *output, ctx *)
 * Hashes with this order reversed, need a #define here, to 'fix' their order
 */

#include "gost.h"
#define john_gost_final(a,b) john_gost_final(b,a)

#include "sph_ripemd.h"
#define sph_ripemd128_close(a,b) sph_ripemd128_close(b,a)
#define sph_ripemd160_close(a,b) sph_ripemd160_close(b,a)
#define sph_ripemd256_close(a,b) sph_ripemd256_close(b,a)
#define sph_ripemd320_close(a,b) sph_ripemd320_close(b,a)

#include "sph_tiger.h"
#define sph_tiger_close(a,b) sph_tiger_close(b,a)

#include "sph_haval.h"
#define sph_haval128_3_close(a,b) sph_haval128_3_close(b,a)
#define sph_haval128_4_close(a,b) sph_haval128_4_close(b,a)
#define sph_haval128_5_close(a,b) sph_haval128_5_close(b,a)
#define sph_haval160_3_close(a,b) sph_haval160_3_close(b,a)
#define sph_haval160_4_close(a,b) sph_haval160_4_close(b,a)
#define sph_haval160_5_close(a,b) sph_haval160_5_close(b,a)
#define sph_haval192_3_close(a,b) sph_haval192_3_close(b,a)
#define sph_haval192_4_close(a,b) sph_haval192_4_close(b,a)
#define sph_haval192_5_close(a,b) sph_haval192_5_close(b,a)
#define sph_haval224_3_close(a,b) sph_haval224_3_close(b,a)
#define sph_haval224_4_close(a,b) sph_haval224_4_close(b,a)
#define sph_haval224_5_close(a,b) sph_haval224_5_close(b,a)
#define sph_haval256_3_close(a,b) sph_haval256_3_close(b,a)
#define sph_haval256_4_close(a,b) sph_haval256_4_close(b,a)
#define sph_haval256_5_close(a,b) sph_haval256_5_close(b,a)

#include "sph_md2.h"
#define sph_md2_close(a,b) sph_md2_close(b,a)

#include "sph_panama.h"
#define sph_panama_close(a,b) sph_panama_close(b,a)

#include "sph_skein.h"
#define sph_skein224_close(a,b) sph_skein224_close(b,a)
#define sph_skein256_close(a,b) sph_skein256_close(b,a)
#define sph_skein384_close(a,b) sph_skein384_close(b,a)
#define sph_skein512_close(a,b) sph_skein512_close(b,a)

#if HAVE_LIBCRYPTO
#include <openssl/opensslv.h>
#endif
#if (AC_BUILT && HAVE_WHIRLPOOL) ||	  \
   (!AC_BUILT && OPENSSL_VERSION_NUMBER >= 0x10000000 && !HAVE_NO_SSL_WHIRLPOOL)
#include <openssl/whrlpool.h>
#else
#include "sph_whirlpool.h"
#define WHIRLPOOL_CTX             sph_whirlpool_context
#define WHIRLPOOL_Init(a)         sph_whirlpool_init(a)
#define WHIRLPOOL_Update(a,b,c)   sph_whirlpool(a,b,c)
#define WHIRLPOOL_Final(a,b)      sph_whirlpool_close(b,a)
#endif

#include "KeccakHash.h"
#define KECCAK_CTX                  Keccak_HashInstance
#define KECCAK_Update(a,b,c)        Keccak_HashUpdate(a,b,(c)*8)
#define KECCAK_Final(a,b)           Keccak_HashFinal(b,a)
#define KECCAK_224_Init(hash)       Keccak_HashInitialize(hash, 1152,  448, 224, 0x01)
#define KECCAK_256_Init(hash)       Keccak_HashInitialize(hash, 1088,  512, 256, 0x01)
#define KECCAK_384_Init(hash)       Keccak_HashInitialize(hash,  832,  768, 384, 0x01)
#define KECCAK_512_Init(hash)       Keccak_HashInitialize(hash,  576, 1024, 512, 0x01)
// FIPS202 complient
#define SHA3_224_Init(hash)         Keccak_HashInitialize(hash, 1152,  448, 224, 0x06)
#define SHA3_256_Init(hash)         Keccak_HashInitialize(hash, 1088,  512, 256, 0x06)
#define SHA3_384_Init(hash)         Keccak_HashInitialize(hash,  832,  768, 384, 0x06)
#define SHA3_512_Init(hash)         Keccak_HashInitialize(hash,  576, 1024, 512, 0x06)

#include "sm3.h"
#define sm3_final(a,b) sm3_final(b,a)

#ifdef _OPENMP
#include <omp.h>
#endif


#if !defined (_DEBUG)
#define m_count m_Dynamic_Count
#endif
extern unsigned int m_count;

#define eLargeOut dyna_eLargeOut
extern eLargeOut_t *eLargeOut;
#define nLargeOff dyna_nLargeOff
extern unsigned *nLargeOff;

extern MD5_OUT *crypt_key_X86;
extern MD5_OUT *crypt_key2_X86;
extern MD5_IN *input_buf_X86;
extern MD5_IN *input_buf2_X86;
extern unsigned int *total_len_X86;
extern unsigned int *total_len2_X86;
extern BIG_HASH_OUT dynamic_BHO[4];

extern const char *dynamic_itoa16;

#if !defined (_DEBUG)
#define curdat Dynamic_curdat
#endif
extern private_subformat_data curdat;

#if !defined (_DEBUG)
#define itoa16_w2 __Dynamic_itoa_w2
#define itoa16_w2_u __Dynamic_itoa_w2_u
#define itoa16_w2_l __Dynamic_itoa_w2_l
#endif
extern unsigned short itoa16_w2_u[256], *itoa16_w2;

inline static void _eLargeOut_set(eLargeOut_t what, int tid)
{
	eLargeOut[tid] = what;
}

inline static int _eLargeOut_get(int tid)
{
	return eLargeOut[tid];
}

inline static void _nLargeOff_set(unsigned what, int tid)
{
	nLargeOff[tid] = what;
}

inline static int _nLargeOff_get(int tid)
{
	return nLargeOff[tid];
}

#if !defined (_OPENMP)
#define eLargeOut_set(what, tid)  _eLargeOut_set(what, 0)
#define eLargeOut_get(tid)        _eLargeOut_get(0)
#define nLargeOff_set(what, tid)  _nLargeOff_set(what, 0)
#define nLargeOff_get(tid)        _nLargeOff_get(0)
#else
#define eLargeOut_set(what, tid)  _eLargeOut_set(what, tid)
#define eLargeOut_get(tid)        _eLargeOut_get(tid)
#define nLargeOff_set(what, tid)  _nLargeOff_set(what, tid)
#define nLargeOff_get(tid)        _nLargeOff_get(tid)
#endif

/* These SIMPLE setter functions, change how the large hash output format is performed   */
/* Once set, it stays that way, until set a different way.  By DEFAULT (i.e. it is reset */
/* this way each time), when crypt_all is called, the large output is in eBase16 mode    */
// These MIGHT have problems in _OPENMP builds!!
void DynamicFunc__LargeHash_OUTMode_base16(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBase16,tid);
}

void DynamicFunc__LargeHash_OUTMode_base16u(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBase16u,tid);
}

void DynamicFunc__LargeHash_OUTMode_base64(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBase64,tid);
}

void DynamicFunc__LargeHash_OUTMode_base64c(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBase64c,tid);
}

void DynamicFunc__LargeHash_OUTMode_base64_nte(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBase64_nte,tid);
}

void DynamicFunc__LargeHash_OUTMode_raw(DYNA_OMP_PARAMS)
{
	eLargeOut_set(eBaseRaw,tid);
}


void DynamicFunc__LargeHash_set_offset_16(DYNA_OMP_PARAMS)
{
	nLargeOff_set(16,tid);
}
void DynamicFunc__LargeHash_set_offset_20(DYNA_OMP_PARAMS)
{
	nLargeOff_set(20,tid);
}
void DynamicFunc__LargeHash_set_offset_24(DYNA_OMP_PARAMS)
{
	nLargeOff_set(24,tid);
}
void DynamicFunc__LargeHash_set_offset_28(DYNA_OMP_PARAMS)
{
	nLargeOff_set(28,tid);
}
void DynamicFunc__LargeHash_set_offset_32(DYNA_OMP_PARAMS)
{
	nLargeOff_set(32,tid);
}
void DynamicFunc__LargeHash_set_offset_40(DYNA_OMP_PARAMS)
{
	nLargeOff_set(40,tid);
}
void DynamicFunc__LargeHash_set_offset_48(DYNA_OMP_PARAMS)
{
	nLargeOff_set(48,tid);
}
void DynamicFunc__LargeHash_set_offset_56(DYNA_OMP_PARAMS)
{
	nLargeOff_set(56,tid);
}
void DynamicFunc__LargeHash_set_offset_64(DYNA_OMP_PARAMS)
{
	nLargeOff_set(64,tid);
}
void DynamicFunc__LargeHash_set_offset_80(DYNA_OMP_PARAMS)
{
	nLargeOff_set(80,tid);
}
void DynamicFunc__LargeHash_set_offset_96(DYNA_OMP_PARAMS)
{
	nLargeOff_set(96,tid);
}
void DynamicFunc__LargeHash_set_offset_100(DYNA_OMP_PARAMS)
{
	nLargeOff_set(100,tid);
}
void DynamicFunc__LargeHash_set_offset_112(DYNA_OMP_PARAMS)
{
	nLargeOff_set(112,tid);
}
void DynamicFunc__LargeHash_set_offset_128(DYNA_OMP_PARAMS)
{
	nLargeOff_set(128,tid);
}
void DynamicFunc__LargeHash_set_offset_160(DYNA_OMP_PARAMS)
{
	nLargeOff_set(160,tid);
}
void DynamicFunc__LargeHash_set_offset_192(DYNA_OMP_PARAMS)
{
	nLargeOff_set(192,tid);
}
extern int get_dynamic_fmt_saltlen();
void DynamicFunc__LargeHash_set_offset_saltlen(DYNA_OMP_PARAMS)
{
	nLargeOff_set(get_dynamic_fmt_saltlen(),tid);
}
/******************************************************************************
 *****  These helper functions are used by all of the 'LARGE' hash functions.
 *****  These are used to convert an 'out' into the proper format, and writing
 *****  it to the buffer.  Currently we handle base-16, base-16u, base-64 and
 *****  raw buffer writing. These functions do not return any count of bytes
 *****  nor deal with things like overwrite/appending.  That has to be done in
 *****  the calling function.  The caller will get the pointers setup, then call
 *****  these helpers.  Then the caller will update any length values if needed
 *****  based upon what the output pointer was, and what was returned by these
 *****  helpers.  Doing things like this will reduce the size of the large hash
 *****  primitive functions.
 ******************************************************************************/
inline static unsigned char *hex_out_buf(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt)
{
	unsigned int j;
	for (j = 0; j < in_byte_cnt; ++j) {
#if ARCH_ALLOWS_UNALIGNED
		*((unsigned short*)cpo) = itoa16_w2[*cpi++];
		cpo += 2;
#else
		*cpo++ = dynamic_itoa16[*cpi>>4];
		*cpo++ = dynamic_itoa16[*cpi&0xF];
		++cpi;
#endif
	}
	return cpo;
}

// NOTE, cpo must be at least in_byte_cnt*2 bytes of buffer
inline static unsigned char *hexu_out_buf(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt)
{
	unsigned int j;
	for (j = 0; j < in_byte_cnt; ++j) {
#if ARCH_ALLOWS_UNALIGNED
		*((unsigned short*)cpo) = itoa16_w2_u[*cpi++];
		cpo += 2;
#else
		*cpo++ = itoa16u[*cpi>>4];
		*cpo++ = itoa16u[*cpi&0xF];
		++cpi;
#endif
	}
	return cpo;
}

// NOTE, cpo must be at least in_byte_cnt bytes of buffer
inline static unsigned char *raw_out_buf(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt)
{
	unsigned int j;
#if ARCH_ALLOWS_UNALIGNED
	// note, all of these 'should' be even divisible by 4.  If not, then we need to rethink this logic.
	uint32_t *pi = (uint32_t*)cpi;
	uint32_t *po = (uint32_t*)cpo;
	in_byte_cnt>>=2;
	for (j = 0; j < in_byte_cnt; ++j)
		*po++ = *pi++;
	return (unsigned char*)po;
#else
	for (j = 0; j < in_byte_cnt; ++j)
		*cpo++ = *cpi++;
	return cpo;
#endif
}

// compatible 'standard' MIME base-64 encoding.
inline static unsigned char *base64_out_buf(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt, int add_eq)
{
	static char *_itoa64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

	while (in_byte_cnt > 2) {
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4) + ((cpi[1] & 0xf0) >> 4)];
		*cpo++ = _itoa64[((cpi[1] & 0x0f) << 2) + ((cpi[2] & 0xc0) >> 6)];
		*cpo++ = _itoa64[cpi[2] & 0x3f];
		cpi += 3;
		in_byte_cnt -= 3;
	}
	// easiest way is to simply have 2 'special' cases to handle these lengths
	if (in_byte_cnt==2)
	{
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4) + ((cpi[1] & 0xf0) >> 4)];
		*cpo++ = _itoa64[((cpi[1] & 0x0f) << 2)];
		if (add_eq) *cpo++ = '=';
	}
	if (in_byte_cnt==1)
	{
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4)];
		if (add_eq) { *cpo++ = '='; *cpo++ = '='; }
	}
	return cpo;
}

// compatible 'crypt' charset base-64 encoding.
inline static unsigned char *base64c_out_buf(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt)
{
	static char *_itoa64 = "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

	while (in_byte_cnt > 2) {
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4) + ((cpi[1] & 0xf0) >> 4)];
		*cpo++ = _itoa64[((cpi[1] & 0x0f) << 2) + ((cpi[2] & 0xc0) >> 6)];
		*cpo++ = _itoa64[cpi[2] & 0x3f];
		cpi += 3;
		in_byte_cnt -= 3;
	}
	// easiest way is to simply have 2 'special' cases to handle these lengths
	if (in_byte_cnt==2)
	{
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4) + ((cpi[1] & 0xf0) >> 4)];
		*cpo++ = _itoa64[((cpi[1] & 0x0f) << 2)];
	}
	if (in_byte_cnt==1)
	{
		*cpo++ = _itoa64[(cpi[0] & 0xfc) >> 2];
		*cpo++ = _itoa64[((cpi[0] & 0x03) << 4)];
	}
	return cpo;
}

inline int large_hash_output(unsigned char *cpi, unsigned char *cpo, int in_byte_cnt, int tid)
{
	unsigned char *cpo2=cpo;
	switch(eLargeOut_get(tid)) {
		case eBase16:
			cpo2 = hex_out_buf(cpi, cpo, in_byte_cnt);
			break;
		case eBase16u:
			cpo2 = hexu_out_buf(cpi, cpo, in_byte_cnt);
			break;
		case eBase64:
			cpo2 = base64_out_buf(cpi, cpo, in_byte_cnt, 1);
			break;
		case eBase64_nte:
			cpo2 = base64_out_buf(cpi, cpo, in_byte_cnt, 0);
			break;
		case eBase64c:
			cpo2 = base64c_out_buf(cpi, cpo, in_byte_cnt);
			break;
		case eBaseRaw:
			cpo2 = raw_out_buf(cpi, cpo, in_byte_cnt);
			break;
		case eUNK:
		default:
			exit(fprintf(stderr, "Error, unknown 'output' state found in large_hash_output function, in %s\n", curdat.dynamic_WHICH_TYPE_SIG));
	}
	return cpo2-cpo;
}

#if SIMD_COEF_32
inline static uint32_t Do_FixBufferLen32(unsigned char *input_buf, int total_len, int BE_HASH)
{
	uint32_t *p;
	unsigned char *cp;
	unsigned int i;
	uint32_t ret = (total_len / 64) + 1;

	if (total_len % 64 > 55)
		++ret;
	cp = &(input_buf[total_len]);
	i = total_len;
	// first, get us to an even 32 bit boundary.
	while (i&3) {
		*cp++ = 0;
		++i;
	}
	// now switch to uint_32's
	p = (uint32_t *)cp;
	// this is how many 32 bit words max we will clean.
	i = ((ret<<6)-i)/4-2;
	while (i--) {
		*p++ = 0;
		if (!p[0] && !p[1])
		break;
	}
	input_buf[total_len] = 0x80;
	p = (uint32_t *)input_buf;
	if (BE_HASH) {
		p[(ret*16)-2] = 0;
		p[(ret*16)-1] = JOHNSWAP(total_len<<3);
	} else {
		p[(ret*16)-2] = (total_len<<3);
		p[(ret*16)-1] = 0;
	}
	return ret;
}

inline static uint32_t Do_FixBufferLen64(unsigned char *input_buf, int total_len, int BE_HASH)
{
	uint64_t *p;
	unsigned char *cp;
	unsigned int i;
	unsigned int ret = (total_len / 128) + 1;

	if (total_len % 128 > 111)
		++ret;
	cp = &(input_buf[total_len]);
	i = total_len;
	// first, get us to an even 64 bit boundary.
	while (i&7) {
		*cp++ = 0;
		++i;
	}
	// now switch to uint_64's
	p = (uint64_t *)cp;
	// this is how many 64 bit words max we will clean.
	i = ((ret<<7)-i)/8-2;
	while (i--) {
		*p++ = 0;
		if (!p[0] && !p[1])
			break;
	}
	if ( ( ((unsigned char*)p)-input_buf) < 56) {
		p = &(((uint64_t *)input_buf)[7]);
		i = 22; // 256 bytes - 64 bytes (for first 64 byte MD buffer) / 8 bytes per uint64_t - 2 (-2 is last 16 bytes of 256 buffer)
		do {
			*p++ = 0;
			if (!p[0] && !p[1])
				break;
		} while (i--);
	} else if ( ( ((unsigned char*)p)-input_buf) > 111 && ( ((unsigned char*)p)-input_buf) < 184) {
		p = &(((uint64_t *)input_buf)[23]);
		i = 6; // 256 bytes - 192 bytes (for first 3-32 byte MD buffers) / 8 bytes per uint64_t - 2 (-2 is last 16 bytes of 256 buffer)
		do {
			*p++ = 0;
			if (!p[0] && !p[1])
				break;
		} while (i--);
	}
	input_buf[total_len] = 0x80;
	p = (uint64_t *)input_buf;

	if (BE_HASH) {
		p[(ret*16)-2] = 0;
		p[(ret*16)-1] = JOHNSWAP64(total_len<<3);
	} else {
		p[(ret*16)-2] = (total_len<<3);
		p[(ret*16)-1] = 0;
	}

	return ret;
}
#endif

#ifdef _OPENMP
#define PRELIM_W_TID   uint32_t i=first, til=last
#define PRELIM_NO_TID  uint32_t i=first, til=last
#else
#define PRELIM_W_TID   uint32_t i=0, til=m_count, tid=0
#define PRELIM_NO_TID  uint32_t i=0, til=m_count
#endif

/****************************************************************************
 ****************************************************************************
 ** NOTE, all code after this point should NEVER be hand edited.           **
 ** if there are bugs found, then the code in dynamic_big_crypt_hash.cin   **
 ** is what needs to be fixed, and then the build procedure 'may' also     **
 ** need to be updated.  A perl script, and the dynamic_big_crypt_hash.cin **
 ** is used to generate all the code after this comment!                   **
 *****************************************************************************
 ****************************************************************************/

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=MD5 PARAHASH=MD5 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=MD5_CTX HASH_Init=MD5_Init HASH_Update=MD5_Update HASH_Final=MD5_Final SSEBody=SIMDmd5body SSE_LIMBS=4 SSE_ONLY_LIMBS=2BUF_ SSEFLAGS=  UNDEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  MD5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_MD5
#define MD5_LOOPS (SIMD_COEF_32*SIMD_PARA_MD5)
static const uint32_t MD5_inc = MD5_LOOPS;

static void DoMD5_crypt_f_sse(void *in, uint32_t len[MD5_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD5_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[MD5_LOOPS], bMore, cnt;
//	uint32_t max_cnt=0;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD5_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 0);
//		if (max_cnt < loops[i]) max_cnt = loops[1];
		cp += 64*4;
	}
	cp = (unsigned char*)in;
//	if (max_cnt==1) { SIMDmd5body(cp, out, out, SSEi_FLAT_IN |SSEi_4BUF_INPUT|SSEi_FLAT_OUT); return; }
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDmd5body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD5_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(16/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] =  a[(j*SIMD_COEF_32)+offx];
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

static void DoMD5_crypt_sse(void *in, uint32_t ilen[MD5_LOOPS], void *out[MD5_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD5_LOOPS)/sizeof(uint32_t)];
	union yy { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } y;
	uint32_t i, j, loops[MD5_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD5_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, ilen[i], 0);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDmd5body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD5_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*16/sizeof(uint32_t)*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					y.a[j] =  a[(j*SIMD_COEF_32)+offx];
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 16, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

inline static void DoMD5_sse_crypt_only(void *in, uint32_t len[MD5_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD5_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[MD5_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD5_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 0);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDmd5body(cp, a, a, SSEi_FLAT_IN |SSEi_OUTPUT_AS_2BUF_INP_FMT|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD5_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(16/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] =  a[(j*SIMD_COEF_32)+offx];
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 32*2;
		++cnt;
	}
}

#else
#define MD5_LOOPS 1
static const uint32_t MD5_inc = 1;

inline static void DoMD5_crypt_f(void *in, uint32_t len, void *out) {
	unsigned char *crypt_out=(unsigned char*)out;
	MD5_CTX ctx;
	MD5_Init(&ctx);
	MD5_Update(&ctx, in, len);
	MD5_Final(crypt_out, &ctx);
}

inline static void DoMD5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	MD5_CTX ctx;
	MD5_Init(&ctx);
	MD5_Update(&ctx, in, ilen);
	MD5_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoMD5_crypt_only(void *in, uint32_t ilen, void *out)
{
	MD5_CTX ctx;
	MD5_Init(&ctx);
	MD5_Update(&ctx, in, ilen);
	MD5_Final(out, &ctx);
}
#endif

void DynamicFunc__MD5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoMD5_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoMD5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoMD5_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD5_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD5_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD5_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD5_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD5_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD5_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD5_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD5_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD5_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD5_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		uint32_t j, len[MD5_LOOPS], x[MD5_LOOPS];
		void *out[MD5_LOOPS];
		for (j = 0; j < MD5_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD5_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD5_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__MD5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
		dynamic_BHO[Y].BE = 0;
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoMD5_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoMD5_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoMD5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoMD5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__MD5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__MD5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__MD5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__MD5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__MD5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__MD5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__MD5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__MD5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__MD5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
	uint32_t j, len[MD5_LOOPS];
	for (j = 0; j < MD5_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoMD5_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__MD5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD5_inc) {
#ifdef SIMD_PARA_MD5
	uint32_t j, len[MD5_LOOPS];
	for (j = 0; j < MD5_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoMD5_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoMD5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=MD4 PARAHASH=MD4 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=MD4_CTX HASH_Init=MD4_Init HASH_Update=MD4_Update HASH_Final=MD4_Final SSEBody=SIMDmd4body SSE_LIMBS=4 SSE_ONLY_LIMBS=2BUF_ SSEFLAGS=  UNDEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  MD4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_MD4
#define MD4_LOOPS (SIMD_COEF_32*SIMD_PARA_MD4)
static const uint32_t MD4_inc = MD4_LOOPS;

static void DoMD4_crypt_f_sse(void *in, uint32_t len[MD4_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD4_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[MD4_LOOPS], bMore, cnt;
//	uint32_t max_cnt=0;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD4_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 0);
//		if (max_cnt < loops[i]) max_cnt = loops[1];
		cp += 64*4;
	}
	cp = (unsigned char*)in;
//	if (max_cnt==1) { SIMDmd4body(cp, out, out, SSEi_FLAT_IN |SSEi_4BUF_INPUT|SSEi_FLAT_OUT); return; }
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDmd4body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD4_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(16/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] =  a[(j*SIMD_COEF_32)+offx];
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

static void DoMD4_crypt_sse(void *in, uint32_t ilen[MD4_LOOPS], void *out[MD4_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD4_LOOPS)/sizeof(uint32_t)];
	union yy { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } y;
	uint32_t i, j, loops[MD4_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD4_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, ilen[i], 0);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDmd4body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD4_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*16/sizeof(uint32_t)*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					y.a[j] =  a[(j*SIMD_COEF_32)+offx];
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 16, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

inline static void DoMD4_sse_crypt_only(void *in, uint32_t len[MD4_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(16*MD4_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[MD4_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < MD4_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 0);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDmd4body(cp, a, a, SSEi_FLAT_IN |SSEi_OUTPUT_AS_2BUF_INP_FMT|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < MD4_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(16/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] =  a[(j*SIMD_COEF_32)+offx];
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 32*2;
		++cnt;
	}
}

#else
#define MD4_LOOPS 1
static const uint32_t MD4_inc = 1;

inline static void DoMD4_crypt_f(void *in, uint32_t len, void *out) {
	unsigned char *crypt_out=(unsigned char*)out;
	MD4_CTX ctx;
	MD4_Init(&ctx);
	MD4_Update(&ctx, in, len);
	MD4_Final(crypt_out, &ctx);
}

inline static void DoMD4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	MD4_CTX ctx;
	MD4_Init(&ctx);
	MD4_Update(&ctx, in, ilen);
	MD4_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoMD4_crypt_only(void *in, uint32_t ilen, void *out)
{
	MD4_CTX ctx;
	MD4_Init(&ctx);
	MD4_Update(&ctx, in, ilen);
	MD4_Final(out, &ctx);
}
#endif

void DynamicFunc__MD4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoMD4_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoMD4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoMD4_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD4_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD4_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD4_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoMD4_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__MD4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD4_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD4_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD4_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD4_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD4_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD4_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		uint32_t j, len[MD4_LOOPS], x[MD4_LOOPS];
		void *out[MD4_LOOPS];
		for (j = 0; j < MD4_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoMD4_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < MD4_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__MD4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
		dynamic_BHO[Y].BE = 0;
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoMD4_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoMD4_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoMD4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoMD4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__MD4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__MD4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__MD4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__MD4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__MD4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__MD4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__MD4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__MD4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__MD4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
	uint32_t j, len[MD4_LOOPS];
	for (j = 0; j < MD4_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoMD4_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__MD4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD4_inc) {
#ifdef SIMD_PARA_MD4
	uint32_t j, len[MD4_LOOPS];
	for (j = 0; j < MD4_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoMD4_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoMD4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SHA1 PARAHASH=SHA1 BIN_SZ=20 BIN_REAL_SZ=20 BE_HASH=1 JSWAPH=JOHNSWAP( JSWAPT=); HASH_CTX=SHA_CTX HASH_Init=SHA1_Init HASH_Update=SHA1_Update HASH_Final=SHA1_Final SSEBody=SIMDSHA1body SSE_LIMBS=4 SSE_ONLY_LIMBS=2BUF_ SSEFLAGS=  DEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  SHA1 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_SHA1
#define SHA1_LOOPS (SIMD_COEF_32*SIMD_PARA_SHA1)
static const uint32_t SHA1_inc = SHA1_LOOPS;

static void DoSHA1_crypt_f_sse(void *in, uint32_t len[SHA1_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(20*SHA1_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA1_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA1_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in;
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDSHA1body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA1_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(20/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				// only 16 bytes in the 'final'
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

static void DoSHA1_crypt_sse(void *in, uint32_t ilen[SHA1_LOOPS], void *out[SHA1_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(20*SHA1_LOOPS)/sizeof(uint32_t)];
	union yy { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } y;
	uint32_t i, j, loops[SHA1_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA1_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, ilen[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA1body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA1_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*20/sizeof(uint32_t)*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 20/sizeof(uint32_t); ++j) {
					y.a[j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 20, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

inline static void DoSHA1_sse_crypt_only(void *in, uint32_t len[SHA1_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(20*SHA1_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA1_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA1_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA1body(cp, a, a, SSEi_FLAT_IN |SSEi_OUTPUT_AS_2BUF_INP_FMT|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA1_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(20/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 20/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 32*2;
		++cnt;
	}
}

#else
#define SHA1_LOOPS 1
static const uint32_t SHA1_inc = 1;

inline static void DoSHA1_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	SHA_CTX ctx;
	SHA1_Init(&ctx);
	SHA1_Update(&ctx, in, len);
	SHA1_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA1_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[20];
	SHA_CTX ctx;
	SHA1_Init(&ctx);
	SHA1_Update(&ctx, in, ilen);
	SHA1_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 20);
		*tot_len += 20*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 20, tid);
}
inline static void DoSHA1_crypt_only(void *in, uint32_t ilen, void *out)
{
	SHA_CTX ctx;
	SHA1_Init(&ctx);
	SHA1_Update(&ctx, in, ilen);
	SHA1_Final(out, &ctx);
}
#endif

void DynamicFunc__SHA1_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA1_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA1_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA1_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA1_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA1_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA1_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA1_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA1_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA1_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA1_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA1_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA1_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA1_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		uint32_t j, len[SHA1_LOOPS], x[SHA1_LOOPS];
		void *out[SHA1_LOOPS];
		for (j = 0; j < SHA1_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA1_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA1_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA1_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__SHA1_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 20; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
		dynamic_BHO[Y].BE = 1;
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoSHA1_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoSHA1_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA1_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA1_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA1_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA1_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__SHA1_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA1_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA1_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA1_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA1_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA1_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA1_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA1_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA1_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA1_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
	uint32_t j, len[SHA1_LOOPS];
	for (j = 0; j < SHA1_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoSHA1_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA1_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__SHA1_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA1_inc) {
#ifdef SIMD_PARA_SHA1
	uint32_t j, len[SHA1_LOOPS];
	for (j = 0; j < SHA1_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoSHA1_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA1_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA1_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SHA224 PARAHASH=SHA256 BIN_SZ=32 BIN_REAL_SZ=28 BE_HASH=1 JSWAPH=JOHNSWAP( JSWAPT=); HASH_CTX=SHA256_CTX HASH_Init=SHA224_Init HASH_Update=SHA256_Update HASH_Final=SHA256_Final SSEBody=SIMDSHA256body SSE_LIMBS=4 SSE_ONLY_LIMBS=2BUF_ SSEFLAGS=|SSEi_CRYPT_SHA224 DEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  SHA224 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_SHA256
#define SHA224_LOOPS (SIMD_COEF_32*SIMD_PARA_SHA256)
static const uint32_t SHA224_inc = SHA224_LOOPS;

static void DoSHA224_crypt_f_sse(void *in, uint32_t len[SHA224_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA224_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA224_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA224_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in;
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA224|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA224_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(32/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				// only 16 bytes in the 'final'
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

static void DoSHA224_crypt_sse(void *in, uint32_t ilen[SHA224_LOOPS], void *out[SHA224_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA224_LOOPS)/sizeof(uint32_t)];
	union yy { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } y;
	uint32_t i, j, loops[SHA224_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA224_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, ilen[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA224|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA224_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*32/sizeof(uint32_t)*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 32/sizeof(uint32_t); ++j) {
					y.a[j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 28, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

inline static void DoSHA224_sse_crypt_only(void *in, uint32_t len[SHA224_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA224_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA224_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA224_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA224|SSEi_OUTPUT_AS_2BUF_INP_FMT|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA224_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(32/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 32/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 32*2;
		++cnt;
	}
}

#else
#define SHA224_LOOPS 1
static const uint32_t SHA224_inc = 1;

inline static void DoSHA224_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	SHA256_CTX ctx;
	SHA224_Init(&ctx);
	SHA256_Update(&ctx, in, len);
	SHA256_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA224_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	SHA256_CTX ctx;
	SHA224_Init(&ctx);
	SHA256_Update(&ctx, in, ilen);
	SHA256_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoSHA224_crypt_only(void *in, uint32_t ilen, void *out)
{
	SHA256_CTX ctx;
	SHA224_Init(&ctx);
	SHA256_Update(&ctx, in, ilen);
	SHA256_Final(out, &ctx);
}
#endif

void DynamicFunc__SHA224_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA224_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA224_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA224_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA224_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA224_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA224_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA224_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA224_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA224_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA224_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA224_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA224_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA224_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA224_LOOPS], x[SHA224_LOOPS];
		void *out[SHA224_LOOPS];
		for (j = 0; j < SHA224_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA224_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA224_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__SHA224_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
		dynamic_BHO[Y].BE = 1;
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoSHA224_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoSHA224_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA224_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA224_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA224_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA224_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__SHA224_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA224_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA224_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA224_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA224_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA224_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA224_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA224_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA224_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA224_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
	uint32_t j, len[SHA224_LOOPS];
	for (j = 0; j < SHA224_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoSHA224_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA224_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__SHA224_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA224_inc) {
#ifdef SIMD_PARA_SHA256
	uint32_t j, len[SHA224_LOOPS];
	for (j = 0; j < SHA224_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoSHA224_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA224_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA224_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SHA256 PARAHASH=SHA256 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=1 JSWAPH=JOHNSWAP( JSWAPT=); HASH_CTX=SHA256_CTX HASH_Init=SHA256_Init HASH_Update=SHA256_Update HASH_Final=SHA256_Final SSEBody=SIMDSHA256body SSE_LIMBS=4 SSE_ONLY_LIMBS=2BUF_ SSEFLAGS=  DEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  SHA256 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_SHA256
#define SHA256_LOOPS (SIMD_COEF_32*SIMD_PARA_SHA256)
static const uint32_t SHA256_inc = SHA256_LOOPS;

static void DoSHA256_crypt_f_sse(void *in, uint32_t len[SHA256_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA256_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA256_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA256_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in;
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA256_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(32/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				// only 16 bytes in the 'final'
				for (j = 0; j < 16/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

static void DoSHA256_crypt_sse(void *in, uint32_t ilen[SHA256_LOOPS], void *out[SHA256_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA256_LOOPS)/sizeof(uint32_t)];
	union yy { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } y;
	uint32_t i, j, loops[SHA256_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA256_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, ilen[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN |SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA256_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*32/sizeof(uint32_t)*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 32/sizeof(uint32_t); ++j) {
					y.a[j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 32, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 32*2; ++cnt;
	}
}

inline static void DoSHA256_sse_crypt_only(void *in, uint32_t len[SHA256_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint32_t a[(32*SHA256_LOOPS)/sizeof(uint32_t)];
	uint32_t i, j, loops[SHA256_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA256_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen32(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA256body(cp, a, a, SSEi_FLAT_IN |SSEi_OUTPUT_AS_2BUF_INP_FMT|SSEi_4BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA256_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_32)*(32/sizeof(uint32_t))*SIMD_COEF_32)+(i&(SIMD_COEF_32-1));
				for (j = 0; j < 32/sizeof(uint32_t); ++j) {
					((uint32_t*)out)[(i*4)+j] = JOHNSWAP(a[(j*SIMD_COEF_32)+offx]);
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 32*2;
		++cnt;
	}
}

#else
#define SHA256_LOOPS 1
static const uint32_t SHA256_inc = 1;

inline static void DoSHA256_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	SHA256_CTX ctx;
	SHA256_Init(&ctx);
	SHA256_Update(&ctx, in, len);
	SHA256_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA256_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	SHA256_CTX ctx;
	SHA256_Init(&ctx);
	SHA256_Update(&ctx, in, ilen);
	SHA256_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoSHA256_crypt_only(void *in, uint32_t ilen, void *out)
{
	SHA256_CTX ctx;
	SHA256_Init(&ctx);
	SHA256_Update(&ctx, in, ilen);
	SHA256_Final(out, &ctx);
}
#endif

void DynamicFunc__SHA256_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA256_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA256_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA256_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA256_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA256_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA256_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA256_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA256_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA256_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA256_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA256_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA256_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA256_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		uint32_t j, len[SHA256_LOOPS], x[SHA256_LOOPS];
		void *out[SHA256_LOOPS];
		for (j = 0; j < SHA256_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA256_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA256_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__SHA256_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
		dynamic_BHO[Y].BE = 1;
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoSHA256_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoSHA256_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA256_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA256_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA256_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA256_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__SHA256_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA256_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA256_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA256_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA256_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA256_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA256_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA256_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA256_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA256_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
	uint32_t j, len[SHA256_LOOPS];
	for (j = 0; j < SHA256_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoSHA256_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA256_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__SHA256_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA256_inc) {
#ifdef SIMD_PARA_SHA256
	uint32_t j, len[SHA256_LOOPS];
	for (j = 0; j < SHA256_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoSHA256_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA256_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA256_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA384 PARAHASH=SHA512 BIN_SZ=64 BIN_REAL_SZ=48 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=SHA512_CTX HASH_Init=SHA384_Init HASH_Update=SHA512_Update HASH_Final=SHA512_Final SSEBody=SIMDSHA512body SSE_LIMBS=2 SSE_ONLY_LIMBS= SSEFLAGS=|SSEi_CRYPT_SHA384 DEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  SHA384 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_SHA512
#define SHA384_LOOPS (SIMD_COEF_64*SIMD_PARA_SHA512)
static const uint32_t SHA384_inc = SHA384_LOOPS;

static void DoSHA384_crypt_f_sse(void *in, uint32_t len[SHA384_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA384_LOOPS)/sizeof(uint64_t)];
	uint32_t i, j, loops[SHA384_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA384_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in;
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA384|SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA384_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*(64/sizeof(uint64_t))*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				// only 16 bytes in the 'final'
				for (j = 0; j < 16/sizeof(uint64_t); ++j) {
					((uint64_t*)out)[(i*2)+j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 64*2; ++cnt;
	}
}

static void DoSHA384_crypt_sse(void *in, uint32_t ilen[SHA384_LOOPS], void *out[SHA384_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA384_LOOPS)/sizeof(uint64_t)];
	union yy { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } y;
	uint32_t i, j, loops[SHA384_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA384_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, ilen[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA384|SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA384_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*64/sizeof(uint64_t)*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				for (j = 0; j < 64/sizeof(uint64_t); ++j) {
					y.a[j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 48, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 64*2; ++cnt;
	}
}

inline static void DoSHA384_sse_crypt_only(void *in, uint32_t len[SHA384_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA384_LOOPS)/sizeof(uint64_t)];
	uint32_t i, j, loops[SHA384_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA384_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN|SSEi_CRYPT_SHA384|SSEi_OUTPUT_AS_INP_FMT|SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA384_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*(64/sizeof(uint64_t))*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				for (j = 0; j < 64/sizeof(uint64_t); ++j) {
					((uint64_t*)out)[(i*2)+j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 64*2;
		++cnt;
	}
}

#else
#define SHA384_LOOPS 1
static const uint32_t SHA384_inc = 1;

inline static void DoSHA384_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	SHA512_CTX ctx;
	SHA384_Init(&ctx);
	SHA512_Update(&ctx, in, len);
	SHA512_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA384_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	SHA512_CTX ctx;
	SHA384_Init(&ctx);
	SHA512_Update(&ctx, in, ilen);
	SHA512_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 48);
		*tot_len += 48*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 48, tid);
}
inline static void DoSHA384_crypt_only(void *in, uint32_t ilen, void *out)
{
	SHA512_CTX ctx;
	SHA384_Init(&ctx);
	SHA512_Update(&ctx, in, ilen);
	SHA512_Final(out, &ctx);
}
#endif

void DynamicFunc__SHA384_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA384_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA384_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA384_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA384_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA384_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA384_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA384_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA384_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA384_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA384_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA384_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA384_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA384_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA384_LOOPS], x[SHA384_LOOPS];
		void *out[SHA384_LOOPS];
		for (j = 0; j < SHA384_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA384_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA384_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__SHA384_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 48; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
		dynamic_BHO[Y].BE = 1;
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoSHA384_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoSHA384_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA384_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA384_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA384_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA384_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__SHA384_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA384_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA384_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA384_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA384_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA384_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA384_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA384_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA384_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA384_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
	uint32_t j, len[SHA384_LOOPS];
	for (j = 0; j < SHA384_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoSHA384_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA384_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__SHA384_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA384_inc) {
#ifdef SIMD_PARA_SHA512
	uint32_t j, len[SHA384_LOOPS];
	for (j = 0; j < SHA384_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoSHA384_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA384_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA384_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA512 PARAHASH=SHA512 BIN_SZ=64 BIN_REAL_SZ=64 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=SHA512_CTX HASH_Init=SHA512_Init HASH_Update=SHA512_Update HASH_Final=SHA512_Final SSEBody=SIMDSHA512body SSE_LIMBS=2 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16
 ***********************************************************************/


/*****************************************************************************
 ****  SHA512 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#ifdef SIMD_PARA_SHA512
#define SHA512_LOOPS (SIMD_COEF_64*SIMD_PARA_SHA512)
static const uint32_t SHA512_inc = SHA512_LOOPS;

static void DoSHA512_crypt_f_sse(void *in, uint32_t len[SHA512_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA512_LOOPS)/sizeof(uint64_t)];
	uint32_t i, j, loops[SHA512_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA512_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in;
	bMore = 1;
	cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN |SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA512_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*(64/sizeof(uint64_t))*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				// only 16 bytes in the 'final'
				for (j = 0; j < 16/sizeof(uint64_t); ++j) {
					((uint64_t*)out)[(i*2)+j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 64*2; ++cnt;
	}
}

static void DoSHA512_crypt_sse(void *in, uint32_t ilen[SHA512_LOOPS], void *out[SHA512_LOOPS], uint32_t *tot_len, uint32_t tid) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA512_LOOPS)/sizeof(uint64_t)];
	union yy { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } y;
	uint32_t i, j, loops[SHA512_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA512_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, ilen[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN |SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA512_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*64/sizeof(uint64_t)*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				for (j = 0; j < 64/sizeof(uint64_t); ++j) {
					y.a[j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
				*(tot_len+i) += large_hash_output(y.u, &(((unsigned char*)out[i])[*(tot_len+i)]), 64, tid);
			} else if (cnt < loops[i]) bMore = 1;
		}
		cp += 64*2; ++cnt;
	}
}

inline static void DoSHA512_sse_crypt_only(void *in, uint32_t len[SHA512_LOOPS], void *out) {
	JTR_ALIGN(MEM_ALIGN_SIMD) uint64_t a[(64*SHA512_LOOPS)/sizeof(uint64_t)];
	uint32_t i, j, loops[SHA512_LOOPS], bMore, cnt;
	unsigned char *cp = (unsigned char*)in;
	for (i = 0; i < SHA512_LOOPS; ++i) {
		loops[i] = Do_FixBufferLen64(cp, len[i], 1);
		cp += 64*4;
	}
	cp = (unsigned char*)in; bMore = 1; cnt = 1;
	while (bMore) {
		SIMDSHA512body(cp, a, a, SSEi_FLAT_IN |SSEi_OUTPUT_AS_INP_FMT|SSEi_2BUF_INPUT_FIRST_BLK|(cnt==1?0:SSEi_RELOAD));
		bMore = 0;
		for (i = 0; i < SHA512_LOOPS; ++i) {
			if (cnt == loops[i]) {
				uint32_t offx = ((i/SIMD_COEF_64)*(64/sizeof(uint64_t))*SIMD_COEF_64)+(i&(SIMD_COEF_64-1));
				for (j = 0; j < 64/sizeof(uint64_t); ++j) {
					((uint64_t*)out)[(i*2)+j] = JOHNSWAP64(a[(j*SIMD_COEF_64)+offx]);
				}
			} else if (cnt < loops[i])
				bMore = 1;
		}
		cp += 64*2;
		++cnt;
	}
}

#else
#define SHA512_LOOPS 1
static const uint32_t SHA512_inc = 1;

inline static void DoSHA512_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	SHA512_CTX ctx;
	SHA512_Init(&ctx);
	SHA512_Update(&ctx, in, len);
	SHA512_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA512_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	SHA512_CTX ctx;
	SHA512_Init(&ctx);
	SHA512_Update(&ctx, in, ilen);
	SHA512_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 64);
		*tot_len += 64*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 64, tid);
}
inline static void DoSHA512_crypt_only(void *in, uint32_t ilen, void *out)
{
	SHA512_CTX ctx;
	SHA512_Init(&ctx);
	SHA512_Update(&ctx, in, ilen);
	SHA512_Final(out, &ctx);
}
#endif

void DynamicFunc__SHA512_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA512_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, &(total_len2_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
		}
		DoSHA512_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, &(total_len_X86[i]), tid);
#else
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA512_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA512_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA512_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = nLargeOff_get(tid);
		}
		DoSHA512_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
#else
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA512_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA512_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA512_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA512_crypt_sse(input_buf_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA512_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA512_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA512_LOOPS; ++j)
			total_len_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		uint32_t j, len[SHA512_LOOPS], x[SHA512_LOOPS];
		void *out[SHA512_LOOPS];
		for (j = 0; j < SHA512_LOOPS; ++j) {
			len[j] = total_len2_X86[i+j];
			#if (MD5_X2)
			if (j&1)
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x2.b2;
			else
			#endif
				out[j] = input_buf2_X86[(i+j)>>MD5_X2].x1.b;
			x[j] = 0;
		}
		DoSHA512_crypt_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, out, x, tid);
		for (j = 0; j < SHA512_LOOPS; ++j)
			total_len2_X86[i+j] = x[j];
#else
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
#endif
	}
}

inline static void _Dyna__SHA512_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 64; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
		dynamic_BHO[Y].BE = 1;
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=1;
		if (X==1) {
			DoSHA512_sse_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, &total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			DoSHA512_sse_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, &total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#else
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA512_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA512_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA512_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA512_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
#endif
	}
}
void DynamicFunc__SHA512_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA512_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA512_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA512_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA512_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA512_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA512_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA512_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA512_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA512_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
	uint32_t j, len[SHA512_LOOPS];
	for (j = 0; j < SHA512_LOOPS; ++j)
		len[j] = total_len_X86[i+j];
	DoSHA512_crypt_f_sse(input_buf_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA512_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

void DynamicFunc__SHA512_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA512_inc) {
#ifdef SIMD_PARA_SHA512
	uint32_t j, len[SHA512_LOOPS];
	for (j = 0; j < SHA512_LOOPS; ++j)
		len[j] = total_len2_X86[i+j];
	DoSHA512_crypt_f_sse(input_buf2_X86[i>>MD5_X2].x1.b, len, crypt_key_X86[i>>MD5_X2].x1.b);
#else
	#if (MD5_X2)
		if (i & 1)
			DoSHA512_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA512_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
#endif
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA3_224 PARAHASH=SHA3_224 BIN_SZ=64 BIN_REAL_SZ=28 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=SHA3_224_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SHA3_224
 ***********************************************************************/


/*****************************************************************************
 ****  SHA3_224 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SHA3_224_LOOPS 1
static const uint32_t SHA3_224_inc = 1;

inline static void DoSHA3_224_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	SHA3_224_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA3_224_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	SHA3_224_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoSHA3_224_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	SHA3_224_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__SHA3_224_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_224_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_224_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SHA3_224_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_224_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SHA3_224_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA3_224_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_224_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_224_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_224_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_224_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SHA3_224_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA3_224_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA3_224_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA3_224_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA3_224_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA3_224_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA3_224_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA3_224_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_224_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA3_224_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_224_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SHA3_224_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_224_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_224_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA3_256 PARAHASH=SHA3_256 BIN_SZ=64 BIN_REAL_SZ=32 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=SHA3_256_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SHA3_256
 ***********************************************************************/


/*****************************************************************************
 ****  SHA3_256 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SHA3_256_LOOPS 1
static const uint32_t SHA3_256_inc = 1;

inline static void DoSHA3_256_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	SHA3_256_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA3_256_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	SHA3_256_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoSHA3_256_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	SHA3_256_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__SHA3_256_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_256_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_256_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SHA3_256_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_256_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SHA3_256_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA3_256_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_256_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_256_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_256_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_256_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SHA3_256_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA3_256_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA3_256_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA3_256_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA3_256_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA3_256_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA3_256_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA3_256_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_256_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA3_256_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_256_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SHA3_256_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_256_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_256_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA3_384 PARAHASH=SHA3_384 BIN_SZ=64 BIN_REAL_SZ=48 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=SHA3_384_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SHA3_384
 ***********************************************************************/


/*****************************************************************************
 ****  SHA3_384 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SHA3_384_LOOPS 1
static const uint32_t SHA3_384_inc = 1;

inline static void DoSHA3_384_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	SHA3_384_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA3_384_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	SHA3_384_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 48);
		*tot_len += 48*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 48, tid);
}
inline static void DoSHA3_384_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	SHA3_384_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__SHA3_384_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_384_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_384_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SHA3_384_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_384_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SHA3_384_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 48; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA3_384_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_384_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_384_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_384_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_384_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SHA3_384_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA3_384_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA3_384_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA3_384_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA3_384_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA3_384_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA3_384_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA3_384_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_384_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA3_384_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_384_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SHA3_384_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_384_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_384_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=SHA3_512 PARAHASH=SHA3_512 BIN_SZ=64 BIN_REAL_SZ=64 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=SHA3_512_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SHA3_512
 ***********************************************************************/


/*****************************************************************************
 ****  SHA3_512 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SHA3_512_LOOPS 1
static const uint32_t SHA3_512_inc = 1;

inline static void DoSHA3_512_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	SHA3_512_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSHA3_512_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	SHA3_512_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 64);
		*tot_len += 64*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 64, tid);
}
inline static void DoSHA3_512_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	SHA3_512_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__SHA3_512_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SHA3_512_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_512_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SHA3_512_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SHA3_512_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SHA3_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSHA3_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SHA3_512_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 64; // Y was 1 based for ease of reading.
	for (; i < til; i += SHA3_512_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_512_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_512_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSHA3_512_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSHA3_512_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SHA3_512_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SHA3_512_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SHA3_512_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SHA3_512_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SHA3_512_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SHA3_512_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SHA3_512_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SHA3_512_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SHA3_512_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SHA3_512_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_512_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SHA3_512_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SHA3_512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSHA3_512_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSHA3_512_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=KECCAK_224 PARAHASH=KECCAK_224 BIN_SZ=64 BIN_REAL_SZ=28 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=KECCAK_224_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_KECCAK_224
 ***********************************************************************/


/*****************************************************************************
 ****  KECCAK_224 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define KECCAK_224_LOOPS 1
static const uint32_t KECCAK_224_inc = 1;

inline static void DoKECCAK_224_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	KECCAK_224_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoKECCAK_224_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	KECCAK_224_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoKECCAK_224_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	KECCAK_224_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__KECCAK_224_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_224_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_224_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__KECCAK_224_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += KECCAK_224_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_224_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_224_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_224_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_224_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__KECCAK_224_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__KECCAK_224_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__KECCAK_224_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__KECCAK_224_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__KECCAK_224_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__KECCAK_224_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__KECCAK_224_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__KECCAK_224_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_224_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__KECCAK_224_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_224_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__KECCAK_224_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_224_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_224_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=KECCAK_256 PARAHASH=KECCAK_256 BIN_SZ=64 BIN_REAL_SZ=32 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=KECCAK_256_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_KECCAK_256
 ***********************************************************************/


/*****************************************************************************
 ****  KECCAK_256 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define KECCAK_256_LOOPS 1
static const uint32_t KECCAK_256_inc = 1;

inline static void DoKECCAK_256_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	KECCAK_256_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoKECCAK_256_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	KECCAK_256_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoKECCAK_256_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	KECCAK_256_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__KECCAK_256_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_256_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_256_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__KECCAK_256_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += KECCAK_256_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_256_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_256_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_256_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_256_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__KECCAK_256_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__KECCAK_256_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__KECCAK_256_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__KECCAK_256_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__KECCAK_256_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__KECCAK_256_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__KECCAK_256_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__KECCAK_256_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_256_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__KECCAK_256_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_256_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__KECCAK_256_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_256_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_256_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=KECCAK_384 PARAHASH=KECCAK_384 BIN_SZ=64 BIN_REAL_SZ=48 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=KECCAK_384_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_KECCAK_384
 ***********************************************************************/


/*****************************************************************************
 ****  KECCAK_384 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define KECCAK_384_LOOPS 1
static const uint32_t KECCAK_384_inc = 1;

inline static void DoKECCAK_384_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	KECCAK_384_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoKECCAK_384_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	KECCAK_384_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 48);
		*tot_len += 48*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 48, tid);
}
inline static void DoKECCAK_384_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	KECCAK_384_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__KECCAK_384_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_384_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_384_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__KECCAK_384_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 48; // Y was 1 based for ease of reading.
	for (; i < til; i += KECCAK_384_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_384_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_384_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_384_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_384_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__KECCAK_384_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__KECCAK_384_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__KECCAK_384_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__KECCAK_384_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__KECCAK_384_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__KECCAK_384_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__KECCAK_384_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__KECCAK_384_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_384_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__KECCAK_384_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_384_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__KECCAK_384_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_384_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_384_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=KECCAK_512 PARAHASH=KECCAK_512 BIN_SZ=64 BIN_REAL_SZ=64 BE_HASH=1 JSWAPH=JOHNSWAP64( JSWAPT=); HASH_CTX=KECCAK_CTX HASH_Init=KECCAK_512_Init HASH_Update=KECCAK_Update HASH_Final=KECCAK_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_KECCAK_512
 ***********************************************************************/


/*****************************************************************************
 ****  KECCAK_512 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define KECCAK_512_LOOPS 1
static const uint32_t KECCAK_512_inc = 1;

inline static void DoKECCAK_512_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	KECCAK_CTX ctx;
	KECCAK_512_Init(&ctx);
	KECCAK_Update(&ctx, in, len);
	KECCAK_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoKECCAK_512_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	KECCAK_CTX ctx;
	KECCAK_512_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 64);
		*tot_len += 64*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 64, tid);
}
inline static void DoKECCAK_512_crypt_only(void *in, uint32_t ilen, void *out)
{
	KECCAK_CTX ctx;
	KECCAK_512_Init(&ctx);
	KECCAK_Update(&ctx, in, ilen);
	KECCAK_Final(out, &ctx);
}

void DynamicFunc__KECCAK_512_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__KECCAK_512_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_512_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += KECCAK_512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoKECCAK_512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__KECCAK_512_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 64; // Y was 1 based for ease of reading.
	for (; i < til; i += KECCAK_512_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_512_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_512_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoKECCAK_512_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoKECCAK_512_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__KECCAK_512_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__KECCAK_512_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__KECCAK_512_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__KECCAK_512_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__KECCAK_512_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__KECCAK_512_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__KECCAK_512_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__KECCAK_512_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__KECCAK_512_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__KECCAK_512_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_512_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__KECCAK_512_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += KECCAK_512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoKECCAK_512_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoKECCAK_512_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=GOST PARAHASH=GOST BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=gost_ctx HASH_Init=john_gost_init HASH_Update=john_gost_update HASH_Final=john_gost_final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_GOST
 ***********************************************************************/


/*****************************************************************************
 ****  GOST functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define GOST_LOOPS 1
static const uint32_t GOST_inc = 1;

inline static void DoGOST_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	gost_ctx ctx;
	john_gost_init(&ctx);
	john_gost_update(&ctx, in, len);
	john_gost_final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoGOST_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	gost_ctx ctx;
	john_gost_init(&ctx);
	john_gost_update(&ctx, in, ilen);
	john_gost_final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoGOST_crypt_only(void *in, uint32_t ilen, void *out)
{
	gost_ctx ctx;
	john_gost_init(&ctx);
	john_gost_update(&ctx, in, ilen);
	john_gost_final(out, &ctx);
}

void DynamicFunc__GOST_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoGOST_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__GOST_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__GOST_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__GOST_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__GOST_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__GOST_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__GOST_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__GOST_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__GOST_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__GOST_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += GOST_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoGOST_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__GOST_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += GOST_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoGOST_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoGOST_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoGOST_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoGOST_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__GOST_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__GOST_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__GOST_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__GOST_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__GOST_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__GOST_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__GOST_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__GOST_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__GOST_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__GOST_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += GOST_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoGOST_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__GOST_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += GOST_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoGOST_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoGOST_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=64 HASH=WHIRLPOOL PARAHASH=WHIRLPOOL BIN_SZ=64 BIN_REAL_SZ=64 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=WHIRLPOOL_CTX HASH_Init=WHIRLPOOL_Init HASH_Update=WHIRLPOOL_Update HASH_Final=WHIRLPOOL_Final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_WHIRLPOOL
 ***********************************************************************/


/*****************************************************************************
 ****  WHIRLPOOL functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define WHIRLPOOL_LOOPS 1
static const uint32_t WHIRLPOOL_inc = 1;

inline static void DoWHIRLPOOL_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint64_t a[64/sizeof(uint64_t)]; } u;
	unsigned char *crypt_out=u.u;
	WHIRLPOOL_CTX ctx;
	WHIRLPOOL_Init(&ctx);
	WHIRLPOOL_Update(&ctx, in, len);
	WHIRLPOOL_Final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoWHIRLPOOL_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	WHIRLPOOL_CTX ctx;
	WHIRLPOOL_Init(&ctx);
	WHIRLPOOL_Update(&ctx, in, ilen);
	WHIRLPOOL_Final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 64);
		*tot_len += 64*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 64, tid);
}
inline static void DoWHIRLPOOL_crypt_only(void *in, uint32_t ilen, void *out)
{
	WHIRLPOOL_CTX ctx;
	WHIRLPOOL_Init(&ctx);
	WHIRLPOOL_Update(&ctx, in, ilen);
	WHIRLPOOL_Final(out, &ctx);
}

void DynamicFunc__WHIRLPOOL_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoWHIRLPOOL_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 64; // Y was 1 based for ease of reading.
	for (; i < til; i += WHIRLPOOL_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 64;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoWHIRLPOOL_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoWHIRLPOOL_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoWHIRLPOOL_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoWHIRLPOOL_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__WHIRLPOOL_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__WHIRLPOOL_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__WHIRLPOOL_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__WHIRLPOOL_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoWHIRLPOOL_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__WHIRLPOOL_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += WHIRLPOOL_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoWHIRLPOOL_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoWHIRLPOOL_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=Tiger PARAHASH=Tiger BIN_SZ=24 BIN_REAL_SZ=24 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_tiger_context HASH_Init=sph_tiger_init HASH_Update=sph_tiger HASH_Final=sph_tiger_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_Tiger
 ***********************************************************************/


/*****************************************************************************
 ****  Tiger functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define Tiger_LOOPS 1
static const uint32_t Tiger_inc = 1;

inline static void DoTiger_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[24]; uint32_t a[24/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_tiger_context ctx;
	sph_tiger_init(&ctx);
	sph_tiger(&ctx, in, len);
	sph_tiger_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoTiger_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[24];
	sph_tiger_context ctx;
	sph_tiger_init(&ctx);
	sph_tiger(&ctx, in, ilen);
	sph_tiger_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 24);
		*tot_len += 24*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 24, tid);
}
inline static void DoTiger_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_tiger_context ctx;
	sph_tiger_init(&ctx);
	sph_tiger(&ctx, in, ilen);
	sph_tiger_close(out, &ctx);
}

void DynamicFunc__Tiger_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoTiger_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__Tiger_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__Tiger_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__Tiger_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__Tiger_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__Tiger_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__Tiger_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__Tiger_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__Tiger_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__Tiger_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += Tiger_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoTiger_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__Tiger_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 24; // Y was 1 based for ease of reading.
	for (; i < til; i += Tiger_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoTiger_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoTiger_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoTiger_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoTiger_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__Tiger_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__Tiger_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__Tiger_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__Tiger_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__Tiger_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__Tiger_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__Tiger_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__Tiger_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__Tiger_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__Tiger_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += Tiger_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoTiger_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__Tiger_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += Tiger_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoTiger_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoTiger_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=RIPEMD128 PARAHASH=RIPEMD128 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_ripemd128_context HASH_Init=sph_ripemd128_init HASH_Update=sph_ripemd128 HASH_Final=sph_ripemd128_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  UNDEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_RIPEMD128
 ***********************************************************************/


/*****************************************************************************
 ****  RIPEMD128 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define RIPEMD128_LOOPS 1
static const uint32_t RIPEMD128_inc = 1;

inline static void DoRIPEMD128_crypt_f(void *in, uint32_t len, void *out) {
	unsigned char *crypt_out=(unsigned char*)out;
	sph_ripemd128_context ctx;
	sph_ripemd128_init(&ctx);
	sph_ripemd128(&ctx, in, len);
	sph_ripemd128_close(crypt_out, &ctx);
}

inline static void DoRIPEMD128_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	sph_ripemd128_context ctx;
	sph_ripemd128_init(&ctx);
	sph_ripemd128(&ctx, in, ilen);
	sph_ripemd128_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoRIPEMD128_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_ripemd128_context ctx;
	sph_ripemd128_init(&ctx);
	sph_ripemd128(&ctx, in, ilen);
	sph_ripemd128_close(out, &ctx);
}

void DynamicFunc__RIPEMD128_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD128_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD128_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD128_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD128_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__RIPEMD128_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += RIPEMD128_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD128_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD128_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD128_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD128_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__RIPEMD128_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__RIPEMD128_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__RIPEMD128_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__RIPEMD128_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__RIPEMD128_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__RIPEMD128_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__RIPEMD128_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__RIPEMD128_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD128_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__RIPEMD128_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD128_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD128_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__RIPEMD128_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD128_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD128_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD128_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=RIPEMD160 PARAHASH=RIPEMD160 BIN_SZ=20 BIN_REAL_SZ=20 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_ripemd160_context HASH_Init=sph_ripemd160_init HASH_Update=sph_ripemd160 HASH_Final=sph_ripemd160_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_RIPEMD160
 ***********************************************************************/


/*****************************************************************************
 ****  RIPEMD160 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define RIPEMD160_LOOPS 1
static const uint32_t RIPEMD160_inc = 1;

inline static void DoRIPEMD160_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_ripemd160_context ctx;
	sph_ripemd160_init(&ctx);
	sph_ripemd160(&ctx, in, len);
	sph_ripemd160_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoRIPEMD160_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[20];
	sph_ripemd160_context ctx;
	sph_ripemd160_init(&ctx);
	sph_ripemd160(&ctx, in, ilen);
	sph_ripemd160_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 20);
		*tot_len += 20*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 20, tid);
}
inline static void DoRIPEMD160_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_ripemd160_context ctx;
	sph_ripemd160_init(&ctx);
	sph_ripemd160(&ctx, in, ilen);
	sph_ripemd160_close(out, &ctx);
}

void DynamicFunc__RIPEMD160_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD160_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD160_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD160_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD160_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__RIPEMD160_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 20; // Y was 1 based for ease of reading.
	for (; i < til; i += RIPEMD160_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD160_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD160_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD160_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD160_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__RIPEMD160_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__RIPEMD160_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__RIPEMD160_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__RIPEMD160_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__RIPEMD160_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__RIPEMD160_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__RIPEMD160_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__RIPEMD160_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD160_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__RIPEMD160_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD160_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD160_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__RIPEMD160_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD160_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD160_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD160_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=RIPEMD256 PARAHASH=RIPEMD256 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_ripemd256_context HASH_Init=sph_ripemd256_init HASH_Update=sph_ripemd256 HASH_Final=sph_ripemd256_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_RIPEMD256
 ***********************************************************************/


/*****************************************************************************
 ****  RIPEMD256 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define RIPEMD256_LOOPS 1
static const uint32_t RIPEMD256_inc = 1;

inline static void DoRIPEMD256_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_ripemd256_context ctx;
	sph_ripemd256_init(&ctx);
	sph_ripemd256(&ctx, in, len);
	sph_ripemd256_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoRIPEMD256_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sph_ripemd256_context ctx;
	sph_ripemd256_init(&ctx);
	sph_ripemd256(&ctx, in, ilen);
	sph_ripemd256_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoRIPEMD256_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_ripemd256_context ctx;
	sph_ripemd256_init(&ctx);
	sph_ripemd256(&ctx, in, ilen);
	sph_ripemd256_close(out, &ctx);
}

void DynamicFunc__RIPEMD256_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD256_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD256_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__RIPEMD256_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += RIPEMD256_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD256_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD256_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD256_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD256_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__RIPEMD256_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__RIPEMD256_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__RIPEMD256_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__RIPEMD256_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__RIPEMD256_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__RIPEMD256_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__RIPEMD256_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__RIPEMD256_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD256_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__RIPEMD256_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD256_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__RIPEMD256_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD256_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD256_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=RIPEMD320 PARAHASH=RIPEMD320 BIN_SZ=40 BIN_REAL_SZ=40 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_ripemd320_context HASH_Init=sph_ripemd320_init HASH_Update=sph_ripemd320 HASH_Final=sph_ripemd320_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_RIPEMD320
 ***********************************************************************/


/*****************************************************************************
 ****  RIPEMD320 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define RIPEMD320_LOOPS 1
static const uint32_t RIPEMD320_inc = 1;

inline static void DoRIPEMD320_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[40]; uint32_t a[40/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_ripemd320_context ctx;
	sph_ripemd320_init(&ctx);
	sph_ripemd320(&ctx, in, len);
	sph_ripemd320_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoRIPEMD320_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[40];
	sph_ripemd320_context ctx;
	sph_ripemd320_init(&ctx);
	sph_ripemd320(&ctx, in, ilen);
	sph_ripemd320_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 40);
		*tot_len += 40*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 40, tid);
}
inline static void DoRIPEMD320_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_ripemd320_context ctx;
	sph_ripemd320_init(&ctx);
	sph_ripemd320(&ctx, in, ilen);
	sph_ripemd320_close(out, &ctx);
}

void DynamicFunc__RIPEMD320_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__RIPEMD320_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD320_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += RIPEMD320_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoRIPEMD320_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__RIPEMD320_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 40; // Y was 1 based for ease of reading.
	for (; i < til; i += RIPEMD320_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD320_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD320_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoRIPEMD320_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoRIPEMD320_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__RIPEMD320_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__RIPEMD320_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__RIPEMD320_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__RIPEMD320_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__RIPEMD320_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__RIPEMD320_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__RIPEMD320_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__RIPEMD320_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__RIPEMD320_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__RIPEMD320_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD320_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD320_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__RIPEMD320_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += RIPEMD320_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoRIPEMD320_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoRIPEMD320_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL128_3 PARAHASH=HAVAL128_3 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval128_3_context HASH_Init=sph_haval128_3_init HASH_Update=sph_haval128_3 HASH_Final=sph_haval128_3_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL128_3
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL128_3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL128_3_LOOPS 1
static const uint32_t HAVAL128_3_inc = 1;

inline static void DoHAVAL128_3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval128_3_context ctx;
	sph_haval128_3_init(&ctx);
	sph_haval128_3(&ctx, in, len);
	sph_haval128_3_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL128_3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	sph_haval128_3_context ctx;
	sph_haval128_3_init(&ctx);
	sph_haval128_3(&ctx, in, ilen);
	sph_haval128_3_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoHAVAL128_3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval128_3_context ctx;
	sph_haval128_3_init(&ctx);
	sph_haval128_3(&ctx, in, ilen);
	sph_haval128_3_close(out, &ctx);
}

void DynamicFunc__HAVAL128_3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL128_3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL128_3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL128_3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL128_3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL128_3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL128_3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL128_4 PARAHASH=HAVAL128_4 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval128_4_context HASH_Init=sph_haval128_4_init HASH_Update=sph_haval128_4 HASH_Final=sph_haval128_4_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL128_4
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL128_4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL128_4_LOOPS 1
static const uint32_t HAVAL128_4_inc = 1;

inline static void DoHAVAL128_4_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval128_4_context ctx;
	sph_haval128_4_init(&ctx);
	sph_haval128_4(&ctx, in, len);
	sph_haval128_4_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL128_4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	sph_haval128_4_context ctx;
	sph_haval128_4_init(&ctx);
	sph_haval128_4(&ctx, in, ilen);
	sph_haval128_4_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoHAVAL128_4_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval128_4_context ctx;
	sph_haval128_4_init(&ctx);
	sph_haval128_4(&ctx, in, ilen);
	sph_haval128_4_close(out, &ctx);
}

void DynamicFunc__HAVAL128_4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL128_4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL128_4_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL128_4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL128_4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL128_4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL128_4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL128_5 PARAHASH=HAVAL128_5 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval128_5_context HASH_Init=sph_haval128_5_init HASH_Update=sph_haval128_5 HASH_Final=sph_haval128_5_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL128_5
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL128_5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL128_5_LOOPS 1
static const uint32_t HAVAL128_5_inc = 1;

inline static void DoHAVAL128_5_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval128_5_context ctx;
	sph_haval128_5_init(&ctx);
	sph_haval128_5(&ctx, in, len);
	sph_haval128_5_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL128_5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	sph_haval128_5_context ctx;
	sph_haval128_5_init(&ctx);
	sph_haval128_5(&ctx, in, ilen);
	sph_haval128_5_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoHAVAL128_5_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval128_5_context ctx;
	sph_haval128_5_init(&ctx);
	sph_haval128_5(&ctx, in, ilen);
	sph_haval128_5_close(out, &ctx);
}

void DynamicFunc__HAVAL128_5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL128_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL128_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL128_5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL128_5_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL128_5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL128_5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL128_5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL128_5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL128_5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL128_5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL128_5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL128_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL128_5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL128_5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL160_3 PARAHASH=HAVAL160_3 BIN_SZ=20 BIN_REAL_SZ=20 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval160_3_context HASH_Init=sph_haval160_3_init HASH_Update=sph_haval160_3 HASH_Final=sph_haval160_3_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL160_3
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL160_3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL160_3_LOOPS 1
static const uint32_t HAVAL160_3_inc = 1;

inline static void DoHAVAL160_3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval160_3_context ctx;
	sph_haval160_3_init(&ctx);
	sph_haval160_3(&ctx, in, len);
	sph_haval160_3_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL160_3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[20];
	sph_haval160_3_context ctx;
	sph_haval160_3_init(&ctx);
	sph_haval160_3(&ctx, in, ilen);
	sph_haval160_3_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 20);
		*tot_len += 20*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 20, tid);
}
inline static void DoHAVAL160_3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval160_3_context ctx;
	sph_haval160_3_init(&ctx);
	sph_haval160_3(&ctx, in, ilen);
	sph_haval160_3_close(out, &ctx);
}

void DynamicFunc__HAVAL160_3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL160_3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 20; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL160_3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL160_3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL160_3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL160_3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL160_3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL160_4 PARAHASH=HAVAL160_4 BIN_SZ=20 BIN_REAL_SZ=20 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval160_4_context HASH_Init=sph_haval160_4_init HASH_Update=sph_haval160_4 HASH_Final=sph_haval160_4_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL160_4
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL160_4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL160_4_LOOPS 1
static const uint32_t HAVAL160_4_inc = 1;

inline static void DoHAVAL160_4_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval160_4_context ctx;
	sph_haval160_4_init(&ctx);
	sph_haval160_4(&ctx, in, len);
	sph_haval160_4_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL160_4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[20];
	sph_haval160_4_context ctx;
	sph_haval160_4_init(&ctx);
	sph_haval160_4(&ctx, in, ilen);
	sph_haval160_4_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 20);
		*tot_len += 20*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 20, tid);
}
inline static void DoHAVAL160_4_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval160_4_context ctx;
	sph_haval160_4_init(&ctx);
	sph_haval160_4(&ctx, in, ilen);
	sph_haval160_4_close(out, &ctx);
}

void DynamicFunc__HAVAL160_4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL160_4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 20; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL160_4_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL160_4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL160_4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL160_4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL160_4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL160_5 PARAHASH=HAVAL160_5 BIN_SZ=20 BIN_REAL_SZ=20 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval160_5_context HASH_Init=sph_haval160_5_init HASH_Update=sph_haval160_5 HASH_Final=sph_haval160_5_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL160_5
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL160_5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL160_5_LOOPS 1
static const uint32_t HAVAL160_5_inc = 1;

inline static void DoHAVAL160_5_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[20]; uint32_t a[20/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval160_5_context ctx;
	sph_haval160_5_init(&ctx);
	sph_haval160_5(&ctx, in, len);
	sph_haval160_5_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL160_5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[20];
	sph_haval160_5_context ctx;
	sph_haval160_5_init(&ctx);
	sph_haval160_5(&ctx, in, ilen);
	sph_haval160_5_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 20);
		*tot_len += 20*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 20, tid);
}
inline static void DoHAVAL160_5_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval160_5_context ctx;
	sph_haval160_5_init(&ctx);
	sph_haval160_5(&ctx, in, ilen);
	sph_haval160_5_close(out, &ctx);
}

void DynamicFunc__HAVAL160_5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL160_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL160_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL160_5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 20; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL160_5_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL160_5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL160_5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL160_5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL160_5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL160_5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL160_5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL160_5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL160_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL160_5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL160_5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL192_3 PARAHASH=HAVAL192_3 BIN_SZ=24 BIN_REAL_SZ=24 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval192_3_context HASH_Init=sph_haval192_3_init HASH_Update=sph_haval192_3 HASH_Final=sph_haval192_3_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL192_3
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL192_3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL192_3_LOOPS 1
static const uint32_t HAVAL192_3_inc = 1;

inline static void DoHAVAL192_3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[24]; uint32_t a[24/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval192_3_context ctx;
	sph_haval192_3_init(&ctx);
	sph_haval192_3(&ctx, in, len);
	sph_haval192_3_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL192_3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[24];
	sph_haval192_3_context ctx;
	sph_haval192_3_init(&ctx);
	sph_haval192_3(&ctx, in, ilen);
	sph_haval192_3_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 24);
		*tot_len += 24*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 24, tid);
}
inline static void DoHAVAL192_3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval192_3_context ctx;
	sph_haval192_3_init(&ctx);
	sph_haval192_3(&ctx, in, ilen);
	sph_haval192_3_close(out, &ctx);
}

void DynamicFunc__HAVAL192_3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL192_3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 24; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL192_3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL192_3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL192_3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL192_3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL192_3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL192_4 PARAHASH=HAVAL192_4 BIN_SZ=24 BIN_REAL_SZ=24 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval192_4_context HASH_Init=sph_haval192_4_init HASH_Update=sph_haval192_4 HASH_Final=sph_haval192_4_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL192_4
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL192_4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL192_4_LOOPS 1
static const uint32_t HAVAL192_4_inc = 1;

inline static void DoHAVAL192_4_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[24]; uint32_t a[24/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval192_4_context ctx;
	sph_haval192_4_init(&ctx);
	sph_haval192_4(&ctx, in, len);
	sph_haval192_4_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL192_4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[24];
	sph_haval192_4_context ctx;
	sph_haval192_4_init(&ctx);
	sph_haval192_4(&ctx, in, ilen);
	sph_haval192_4_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 24);
		*tot_len += 24*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 24, tid);
}
inline static void DoHAVAL192_4_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval192_4_context ctx;
	sph_haval192_4_init(&ctx);
	sph_haval192_4(&ctx, in, ilen);
	sph_haval192_4_close(out, &ctx);
}

void DynamicFunc__HAVAL192_4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL192_4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 24; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL192_4_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL192_4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL192_4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL192_4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL192_4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL192_5 PARAHASH=HAVAL192_5 BIN_SZ=24 BIN_REAL_SZ=24 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval192_5_context HASH_Init=sph_haval192_5_init HASH_Update=sph_haval192_5 HASH_Final=sph_haval192_5_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL192_5
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL192_5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL192_5_LOOPS 1
static const uint32_t HAVAL192_5_inc = 1;

inline static void DoHAVAL192_5_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[24]; uint32_t a[24/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval192_5_context ctx;
	sph_haval192_5_init(&ctx);
	sph_haval192_5(&ctx, in, len);
	sph_haval192_5_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL192_5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[24];
	sph_haval192_5_context ctx;
	sph_haval192_5_init(&ctx);
	sph_haval192_5(&ctx, in, ilen);
	sph_haval192_5_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 24);
		*tot_len += 24*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 24, tid);
}
inline static void DoHAVAL192_5_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval192_5_context ctx;
	sph_haval192_5_init(&ctx);
	sph_haval192_5(&ctx, in, ilen);
	sph_haval192_5_close(out, &ctx);
}

void DynamicFunc__HAVAL192_5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL192_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL192_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL192_5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 24; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL192_5_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL192_5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL192_5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL192_5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL192_5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL192_5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL192_5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL192_5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL192_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL192_5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL192_5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL224_3 PARAHASH=HAVAL224_3 BIN_SZ=28 BIN_REAL_SZ=28 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval224_3_context HASH_Init=sph_haval224_3_init HASH_Update=sph_haval224_3 HASH_Final=sph_haval224_3_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL224_3
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL224_3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL224_3_LOOPS 1
static const uint32_t HAVAL224_3_inc = 1;

inline static void DoHAVAL224_3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[28]; uint32_t a[28/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval224_3_context ctx;
	sph_haval224_3_init(&ctx);
	sph_haval224_3(&ctx, in, len);
	sph_haval224_3_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL224_3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[28];
	sph_haval224_3_context ctx;
	sph_haval224_3_init(&ctx);
	sph_haval224_3(&ctx, in, ilen);
	sph_haval224_3_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoHAVAL224_3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval224_3_context ctx;
	sph_haval224_3_init(&ctx);
	sph_haval224_3(&ctx, in, ilen);
	sph_haval224_3_close(out, &ctx);
}

void DynamicFunc__HAVAL224_3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL224_3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL224_3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL224_3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL224_3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL224_3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL224_3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL224_4 PARAHASH=HAVAL224_4 BIN_SZ=28 BIN_REAL_SZ=28 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval224_4_context HASH_Init=sph_haval224_4_init HASH_Update=sph_haval224_4 HASH_Final=sph_haval224_4_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL224_4
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL224_4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL224_4_LOOPS 1
static const uint32_t HAVAL224_4_inc = 1;

inline static void DoHAVAL224_4_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[28]; uint32_t a[28/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval224_4_context ctx;
	sph_haval224_4_init(&ctx);
	sph_haval224_4(&ctx, in, len);
	sph_haval224_4_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL224_4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[28];
	sph_haval224_4_context ctx;
	sph_haval224_4_init(&ctx);
	sph_haval224_4(&ctx, in, ilen);
	sph_haval224_4_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoHAVAL224_4_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval224_4_context ctx;
	sph_haval224_4_init(&ctx);
	sph_haval224_4(&ctx, in, ilen);
	sph_haval224_4_close(out, &ctx);
}

void DynamicFunc__HAVAL224_4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL224_4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL224_4_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL224_4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL224_4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL224_4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL224_4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL224_5 PARAHASH=HAVAL224_5 BIN_SZ=28 BIN_REAL_SZ=28 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval224_5_context HASH_Init=sph_haval224_5_init HASH_Update=sph_haval224_5 HASH_Final=sph_haval224_5_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL224_5
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL224_5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL224_5_LOOPS 1
static const uint32_t HAVAL224_5_inc = 1;

inline static void DoHAVAL224_5_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[28]; uint32_t a[28/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval224_5_context ctx;
	sph_haval224_5_init(&ctx);
	sph_haval224_5(&ctx, in, len);
	sph_haval224_5_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL224_5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[28];
	sph_haval224_5_context ctx;
	sph_haval224_5_init(&ctx);
	sph_haval224_5(&ctx, in, ilen);
	sph_haval224_5_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoHAVAL224_5_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval224_5_context ctx;
	sph_haval224_5_init(&ctx);
	sph_haval224_5(&ctx, in, ilen);
	sph_haval224_5_close(out, &ctx);
}

void DynamicFunc__HAVAL224_5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL224_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL224_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL224_5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL224_5_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL224_5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL224_5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL224_5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL224_5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL224_5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL224_5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL224_5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL224_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL224_5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL224_5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL256_3 PARAHASH=HAVAL256_3 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval256_3_context HASH_Init=sph_haval256_3_init HASH_Update=sph_haval256_3 HASH_Final=sph_haval256_3_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL256_3
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL256_3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL256_3_LOOPS 1
static const uint32_t HAVAL256_3_inc = 1;

inline static void DoHAVAL256_3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval256_3_context ctx;
	sph_haval256_3_init(&ctx);
	sph_haval256_3(&ctx, in, len);
	sph_haval256_3_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL256_3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sph_haval256_3_context ctx;
	sph_haval256_3_init(&ctx);
	sph_haval256_3(&ctx, in, ilen);
	sph_haval256_3_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoHAVAL256_3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval256_3_context ctx;
	sph_haval256_3_init(&ctx);
	sph_haval256_3(&ctx, in, ilen);
	sph_haval256_3_close(out, &ctx);
}

void DynamicFunc__HAVAL256_3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL256_3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL256_3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL256_3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL256_3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL256_3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL256_3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL256_4 PARAHASH=HAVAL256_4 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval256_4_context HASH_Init=sph_haval256_4_init HASH_Update=sph_haval256_4 HASH_Final=sph_haval256_4_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL256_4
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL256_4 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL256_4_LOOPS 1
static const uint32_t HAVAL256_4_inc = 1;

inline static void DoHAVAL256_4_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval256_4_context ctx;
	sph_haval256_4_init(&ctx);
	sph_haval256_4(&ctx, in, len);
	sph_haval256_4_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL256_4_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sph_haval256_4_context ctx;
	sph_haval256_4_init(&ctx);
	sph_haval256_4(&ctx, in, ilen);
	sph_haval256_4_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoHAVAL256_4_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval256_4_context ctx;
	sph_haval256_4_init(&ctx);
	sph_haval256_4(&ctx, in, ilen);
	sph_haval256_4_close(out, &ctx);
}

void DynamicFunc__HAVAL256_4_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_4_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_4_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_4_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL256_4_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL256_4_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_4_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_4_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_4_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_4_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL256_4_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL256_4_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_4_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL256_4_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_4_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL256_4_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_4_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_4_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_4_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=HAVAL256_5 PARAHASH=HAVAL256_5 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_haval256_5_context HASH_Init=sph_haval256_5_init HASH_Update=sph_haval256_5 HASH_Final=sph_haval256_5_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_HAVAL256_5
 ***********************************************************************/


/*****************************************************************************
 ****  HAVAL256_5 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define HAVAL256_5_LOOPS 1
static const uint32_t HAVAL256_5_inc = 1;

inline static void DoHAVAL256_5_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_haval256_5_context ctx;
	sph_haval256_5_init(&ctx);
	sph_haval256_5(&ctx, in, len);
	sph_haval256_5_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoHAVAL256_5_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sph_haval256_5_context ctx;
	sph_haval256_5_init(&ctx);
	sph_haval256_5(&ctx, in, ilen);
	sph_haval256_5_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoHAVAL256_5_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_haval256_5_context ctx;
	sph_haval256_5_init(&ctx);
	sph_haval256_5(&ctx, in, ilen);
	sph_haval256_5_close(out, &ctx);
}

void DynamicFunc__HAVAL256_5_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_5_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += HAVAL256_5_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoHAVAL256_5_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__HAVAL256_5_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += HAVAL256_5_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_5_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_5_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoHAVAL256_5_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoHAVAL256_5_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__HAVAL256_5_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__HAVAL256_5_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__HAVAL256_5_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__HAVAL256_5_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_5_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__HAVAL256_5_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += HAVAL256_5_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoHAVAL256_5_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoHAVAL256_5_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=PANAMA PARAHASH=PANAMA BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_panama_context HASH_Init=sph_panama_init HASH_Update=sph_panama HASH_Final=sph_panama_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_PANAMA
 ***********************************************************************/


/*****************************************************************************
 ****  PANAMA functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define PANAMA_LOOPS 1
static const uint32_t PANAMA_inc = 1;

inline static void DoPANAMA_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_panama_context ctx;
	sph_panama_init(&ctx);
	sph_panama(&ctx, in, len);
	sph_panama_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoPANAMA_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sph_panama_context ctx;
	sph_panama_init(&ctx);
	sph_panama(&ctx, in, ilen);
	sph_panama_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoPANAMA_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_panama_context ctx;
	sph_panama_init(&ctx);
	sph_panama(&ctx, in, ilen);
	sph_panama_close(out, &ctx);
}

void DynamicFunc__PANAMA_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__PANAMA_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__PANAMA_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__PANAMA_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__PANAMA_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__PANAMA_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__PANAMA_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__PANAMA_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__PANAMA_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__PANAMA_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += PANAMA_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoPANAMA_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__PANAMA_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += PANAMA_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoPANAMA_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoPANAMA_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoPANAMA_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoPANAMA_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__PANAMA_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__PANAMA_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__PANAMA_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__PANAMA_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__PANAMA_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__PANAMA_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__PANAMA_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__PANAMA_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__PANAMA_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__PANAMA_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += PANAMA_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoPANAMA_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__PANAMA_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += PANAMA_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoPANAMA_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoPANAMA_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=MD2 PARAHASH=MD2 BIN_SZ=16 BIN_REAL_SZ=16 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_md2_context HASH_Init=sph_md2_init HASH_Update=sph_md2 HASH_Final=sph_md2_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_MD2
 ***********************************************************************/


/*****************************************************************************
 ****  MD2 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define MD2_LOOPS 1
static const uint32_t MD2_inc = 1;

inline static void DoMD2_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[16]; uint32_t a[16/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_md2_context ctx;
	sph_md2_init(&ctx);
	sph_md2(&ctx, in, len);
	sph_md2_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoMD2_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[16];
	sph_md2_context ctx;
	sph_md2_init(&ctx);
	sph_md2(&ctx, in, ilen);
	sph_md2_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 16);
		*tot_len += 16*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 16, tid);
}
inline static void DoMD2_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_md2_context ctx;
	sph_md2_init(&ctx);
	sph_md2(&ctx, in, ilen);
	sph_md2_close(out, &ctx);
}

void DynamicFunc__MD2_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoMD2_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__MD2_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__MD2_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__MD2_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__MD2_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__MD2_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__MD2_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__MD2_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__MD2_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__MD2_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += MD2_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoMD2_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__MD2_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 16; // Y was 1 based for ease of reading.
	for (; i < til; i += MD2_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoMD2_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD2_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoMD2_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoMD2_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__MD2_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__MD2_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__MD2_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__MD2_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__MD2_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__MD2_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__MD2_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__MD2_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__MD2_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__MD2_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD2_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD2_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__MD2_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += MD2_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoMD2_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoMD2_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SKEIN224 PARAHASH=SKEIN224 BIN_SZ=64 BIN_REAL_SZ=28 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_skein224_context HASH_Init=sph_skein224_init HASH_Update=sph_skein224 HASH_Final=sph_skein224_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SKEIN224
 ***********************************************************************/


/*****************************************************************************
 ****  SKEIN224 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SKEIN224_LOOPS 1
static const uint32_t SKEIN224_inc = 1;

inline static void DoSKEIN224_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint32_t a[64/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_skein224_context ctx;
	sph_skein224_init(&ctx);
	sph_skein224(&ctx, in, len);
	sph_skein224_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSKEIN224_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	sph_skein224_context ctx;
	sph_skein224_init(&ctx);
	sph_skein224(&ctx, in, ilen);
	sph_skein224_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 28);
		*tot_len += 28*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 28, tid);
}
inline static void DoSKEIN224_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_skein224_context ctx;
	sph_skein224_init(&ctx);
	sph_skein224(&ctx, in, ilen);
	sph_skein224_close(out, &ctx);
}

void DynamicFunc__SKEIN224_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN224_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN224_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SKEIN224_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN224_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN224_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN224_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SKEIN224_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 28; // Y was 1 based for ease of reading.
	for (; i < til; i += SKEIN224_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN224_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN224_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN224_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN224_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SKEIN224_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SKEIN224_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SKEIN224_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SKEIN224_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SKEIN224_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SKEIN224_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SKEIN224_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SKEIN224_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN224_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SKEIN224_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN224_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SKEIN224_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN224_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN224_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN224_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SKEIN256 PARAHASH=SKEIN256 BIN_SZ=64 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_skein256_context HASH_Init=sph_skein256_init HASH_Update=sph_skein256 HASH_Final=sph_skein256_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SKEIN256
 ***********************************************************************/


/*****************************************************************************
 ****  SKEIN256 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SKEIN256_LOOPS 1
static const uint32_t SKEIN256_inc = 1;

inline static void DoSKEIN256_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint32_t a[64/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_skein256_context ctx;
	sph_skein256_init(&ctx);
	sph_skein256(&ctx, in, len);
	sph_skein256_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSKEIN256_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	sph_skein256_context ctx;
	sph_skein256_init(&ctx);
	sph_skein256(&ctx, in, ilen);
	sph_skein256_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoSKEIN256_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_skein256_context ctx;
	sph_skein256_init(&ctx);
	sph_skein256(&ctx, in, ilen);
	sph_skein256_close(out, &ctx);
}

void DynamicFunc__SKEIN256_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN256_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN256_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SKEIN256_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN256_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN256_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN256_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SKEIN256_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += SKEIN256_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN256_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN256_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN256_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN256_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SKEIN256_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SKEIN256_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SKEIN256_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SKEIN256_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SKEIN256_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SKEIN256_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SKEIN256_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SKEIN256_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN256_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SKEIN256_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN256_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SKEIN256_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN256_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN256_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN256_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SKEIN384 PARAHASH=SKEIN384 BIN_SZ=64 BIN_REAL_SZ=48 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_skein384_context HASH_Init=sph_skein384_init HASH_Update=sph_skein384 HASH_Final=sph_skein384_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SKEIN384
 ***********************************************************************/


/*****************************************************************************
 ****  SKEIN384 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SKEIN384_LOOPS 1
static const uint32_t SKEIN384_inc = 1;

inline static void DoSKEIN384_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint32_t a[64/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_skein384_context ctx;
	sph_skein384_init(&ctx);
	sph_skein384(&ctx, in, len);
	sph_skein384_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSKEIN384_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	sph_skein384_context ctx;
	sph_skein384_init(&ctx);
	sph_skein384(&ctx, in, ilen);
	sph_skein384_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 48);
		*tot_len += 48*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 48, tid);
}
inline static void DoSKEIN384_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_skein384_context ctx;
	sph_skein384_init(&ctx);
	sph_skein384(&ctx, in, ilen);
	sph_skein384_close(out, &ctx);
}

void DynamicFunc__SKEIN384_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN384_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN384_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SKEIN384_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN384_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN384_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN384_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SKEIN384_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 48; // Y was 1 based for ease of reading.
	for (; i < til; i += SKEIN384_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN384_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN384_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN384_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN384_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SKEIN384_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SKEIN384_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SKEIN384_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SKEIN384_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SKEIN384_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SKEIN384_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SKEIN384_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SKEIN384_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN384_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SKEIN384_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN384_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SKEIN384_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN384_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN384_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN384_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SKEIN512 PARAHASH=SKEIN512 BIN_SZ=64 BIN_REAL_SZ=64 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sph_skein512_context HASH_Init=sph_skein512_init HASH_Update=sph_skein512 HASH_Final=sph_skein512_close SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SKEIN512
 ***********************************************************************/


/*****************************************************************************
 ****  SKEIN512 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SKEIN512_LOOPS 1
static const uint32_t SKEIN512_inc = 1;

inline static void DoSKEIN512_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[64]; uint32_t a[64/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sph_skein512_context ctx;
	sph_skein512_init(&ctx);
	sph_skein512(&ctx, in, len);
	sph_skein512_close(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSKEIN512_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[64];
	sph_skein512_context ctx;
	sph_skein512_init(&ctx);
	sph_skein512(&ctx, in, ilen);
	sph_skein512_close(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 64);
		*tot_len += 64*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 64, tid);
}
inline static void DoSKEIN512_crypt_only(void *in, uint32_t ilen, void *out)
{
	sph_skein512_context ctx;
	sph_skein512_init(&ctx);
	sph_skein512(&ctx, in, ilen);
	sph_skein512_close(out, &ctx);
}

void DynamicFunc__SKEIN512_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SKEIN512_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN512_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SKEIN512_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SKEIN512_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SKEIN512_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSKEIN512_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SKEIN512_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 64; // Y was 1 based for ease of reading.
	for (; i < til; i += SKEIN512_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN512_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN512_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSKEIN512_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSKEIN512_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SKEIN512_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SKEIN512_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SKEIN512_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SKEIN512_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SKEIN512_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SKEIN512_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SKEIN512_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SKEIN512_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SKEIN512_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SKEIN512_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN512_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SKEIN512_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SKEIN512_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSKEIN512_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSKEIN512_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

/***********************************************************************
 * This section of the file auto-generated by dynamic_big_crypt_hash.cin
 * being run through dynamic_big_crypt_chopper.pl with this command line
 * ./dynamic_big_crypt_chopper.pl BITS=32 HASH=SM3 PARAHASH=SM3 BIN_SZ=32 BIN_REAL_SZ=32 BE_HASH=0 JSWAPH=  JSWAPT=; HASH_CTX=sm3_ctx HASH_Init=sm3_init HASH_Update=sm3_update HASH_Final=sm3_final SSEBody= SSE_LIMBS=0 SSE_ONLY_LIMBS= SSEFLAGS=  DEFINED=TRUNC_TO16 UNDEFINED=SIMD_PARA_SM3
 ***********************************************************************/


/*****************************************************************************
 ****  SM3 functions This code generated from dynamic_big_crypt_hash.cin
 ****  !!!!!!!!!!!!DO NOT edit the code in this generated file!!!!!!!!!!!!!!
 ****  It is regenerated at make time, so changes to this file will be lost.
 ****  Any issues found MUST be fixed properly in dynamic_big_crypt_hash.cin
 ****  and not in this file. Then make regenerates this file with the fixes.
 ****************************************************************************/
#define SM3_LOOPS 1
static const uint32_t SM3_inc = 1;

inline static void DoSM3_crypt_f(void *in, uint32_t len, void *out) {
	union xx { unsigned char u[32]; uint32_t a[32/sizeof(uint32_t)]; } u;
	unsigned char *crypt_out=u.u;
	sm3_ctx ctx;
	sm3_init(&ctx);
	sm3_update(&ctx, in, len);
	sm3_final(crypt_out, &ctx);
	memcpy(out, crypt_out, 16);
}

inline static void DoSM3_crypt(void *in, uint32_t ilen, void *out, uint32_t *tot_len, uint32_t tid) {
	unsigned char crypt_out[32];
	sm3_ctx ctx;
	sm3_init(&ctx);
	sm3_update(&ctx, in, ilen);
	sm3_final(crypt_out, &ctx);
	if (eLargeOut_get(tid) == eBase16) {
		// since this is the usual, we avoid the extra overhead of large_hash_output, and go directly to the hex_out.
		hex_out_buf(crypt_out, &(((unsigned char*)out)[*tot_len]), 32);
		*tot_len += 32*2;
	} else
		*tot_len += large_hash_output(crypt_out, &(((unsigned char*)out)[*tot_len]), 32, tid);
}
inline static void DoSM3_crypt_only(void *in, uint32_t ilen, void *out)
{
	sm3_ctx ctx;
	sm3_init(&ctx);
	sm3_update(&ctx, in, ilen);
	sm3_final(out, &ctx);
}

void DynamicFunc__SM3_crypt_input1_append_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &(total_len2_X86[i]), tid);
		else
		#endif
		DoSM3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &(total_len2_X86[i]), tid);
	}
}

void DynamicFunc__SM3_crypt_input2_append_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &(total_len_X86[i]), tid);
		else
		#endif
		DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &(total_len_X86[i]), tid);
	}
}

void DynamicFunc__SM3_crypt_input1_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SM3_crypt_input2_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SM3_crypt_input1_at_offset_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SM3_crypt_input2_at_offset_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = nLargeOff_get(tid);
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
	}
}

void DynamicFunc__SM3_crypt_input1_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SM3_crypt_input1_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

void DynamicFunc__SM3_crypt_input2_overwrite_input1(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len_X86[i] = x;
	}
}

void DynamicFunc__SM3_crypt_input2_overwrite_input2(DYNA_OMP_PARAMS) {
	PRELIM_W_TID;
	for (; i < til; i += SM3_inc) {
		uint32_t x = 0;
		#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x2.b2, &x, tid);
		else
		#endif
		DoSM3_crypt(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], input_buf2_X86[i>>MD5_X2].x1.b, &x, tid);
		total_len2_X86[i] = x;
	}
}

inline static void _Dyna__SM3_crypt_inputX_to_outputY(uint32_t X, uint32_t Y, uint32_t i, uint32_t til) {
	dynamic_BHO[--Y].width = 32; // Y was 1 based for ease of reading.
	for (; i < til; i += SM3_inc) {
		dynamic_BHO[Y].BE = 0;	// CTX requires no swapping.
		dynamic_BHO[Y].bits = 32;
		dynamic_BHO[Y].mixed_SIMD=0;
		if (X==1) {
			#if (MD5_X2)
			if (i & 1)
				DoSM3_crypt_only(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSM3_crypt_only(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], dynamic_BHO[Y].dat[i].b);
		} else {
			#if (MD5_X2)
			if (i & 1)
				DoSM3_crypt_only(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
			else
			#endif
			DoSM3_crypt_only(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], dynamic_BHO[Y].dat[i].b);
		}
	}
}
void DynamicFunc__SM3_crypt_input1_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(1, 1, i, til); }
void DynamicFunc__SM3_crypt_input1_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(1, 2, i, til); }
void DynamicFunc__SM3_crypt_input1_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(1, 3, i, til); }
void DynamicFunc__SM3_crypt_input1_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(1, 4, i, til); }
void DynamicFunc__SM3_crypt_input2_to_output1(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(2, 1, i, til); }
void DynamicFunc__SM3_crypt_input2_to_output2(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(2, 2, i, til); }
void DynamicFunc__SM3_crypt_input2_to_output3(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(2, 3, i, til); }
void DynamicFunc__SM3_crypt_input2_to_output4(DYNA_OMP_PARAMS) { PRELIM_NO_TID; _Dyna__SM3_crypt_inputX_to_outputY(2, 4, i, til); }

void DynamicFunc__SM3_crypt_input1_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SM3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt_f(input_buf_X86[i>>MD5_X2].x2.b2, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSM3_crypt_f(input_buf_X86[i>>MD5_X2].x1.b, total_len_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

void DynamicFunc__SM3_crypt_input2_to_output1_FINAL(DYNA_OMP_PARAMS) {
	PRELIM_NO_TID;
	for (; i < til; i += SM3_inc) {
	#if (MD5_X2)
		if (i & 1)
			DoSM3_crypt_f(input_buf2_X86[i>>MD5_X2].x2.b2, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x2.b2);
		else
	#endif
		DoSM3_crypt_f(input_buf2_X86[i>>MD5_X2].x1.b, total_len2_X86[i], crypt_key_X86[i>>MD5_X2].x1.b);
	}
}

#endif // DYNAMIC_DISABLED
