Project: Part3 – Optimizing and porting argon2 package using C and Assembler language(Progress 4)

Requirements/ System Specifications.

Argon2 Password hashing function package:

https://github.com/P-H-C/phc-winner-argon2

Machine 1:

Aarch64 Fedora 28 version of Linux operating system

Cortex-A57 8 core processor

Two sticks of Dual-Channel DIMM DDR3 8GB RAM (16GB in total)

Machine 2:

Intel(R) Xeon(R) CPU E5-1630 v4 @ 3.70GHz

Four sticks of 8GB DIMM DDR4 RAM at 2.4 GHz (32 GB of RAM in total)

x86_64 Fedora 28 version of Linux Operating System

Continuation of Project: Part3 – Optimizing and porting argon2 package using C and Assembler language(Progress 3) blog:

I have test the modified code seen here:

/*
* Argon2 reference source code package - reference C implementations
*
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*
* You may use this work under the terms of a Creative Commons CC0 1.0
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
* these licenses can be found at:
*
* - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
* - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
*
* You should have received a copy of both of these licenses along with this
* software. If not, they may be obtained at the above URLs.
*/

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#define BILLION 1000000000L;
#ifdef _MSC_VER
#include <intrin.h>
#endif

#include "argon2.h"

/*
static uint64_t rdtsc(void) {
#ifdef _MSC_VER
return __rdtsc();
#else
#if defined(__amd64__) || defined(__x86_64__)
uint64_t rax, rdx;
__asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :);
return (rdx << 32) | rax;
#elif defined(__i386__) || defined(__i386) || defined(__X86__)
uint64_t rax;
__asm__ __volatile__("rdtsc" : "=A"(rax) : :);
return rax;
#elif defined(__aarch64__)
return 1;
#else
return 0;
#endif
#endif
}

*/


/*
* Benchmarks Argon2 with salt length 16, password length 16, t_cost 3,
and different m_cost and threads
*/
static void benchmark() {
#define BENCH_OUTLEN 16
#define BENCH_INLEN 16
const uint32_t inlen = BENCH_INLEN;
const unsigned outlen = BENCH_OUTLEN;
unsigned char out[BENCH_OUTLEN];
unsigned char pwd_array[BENCH_INLEN];
unsigned char salt_array[BENCH_INLEN];
#undef BENCH_INLEN
#undef BENCH_OUTLEN

struct timespec start, stop;
double accum;

uint32_t t_cost = 3;
uint32_t m_cost;
uint32_t thread_test[4] = {1, 2, 4, 8};
argon2_type types[3] = {Argon2_i, Argon2_d, Argon2_id};

memset(pwd_array, 0, inlen);
memset(salt_array, 1, inlen);

for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) {
unsigned i;
for (i = 0; i < 4; ++i) {
double run_time = 0;
uint32_t thread_n = thread_test[i];
unsigned j;
for (j = 0; j < 3; ++j) {
/*clock_t start_time, stop_time;
uint64_t start_cycles, stop_cycles;
uint64_t delta;
double mcycles;*/

argon2_type type = types[j];

/*start_time = clock();
start_cycles = rdtsc();*/

if( clock_gettime( CLOCK_REALTIME, &start) == -1 ) {
perror( "clock gettime" );
exit( EXIT_FAILURE );
}
else
{
clock_gettime(CLOCK_REALTIME, &start);
}

argon2_hash(t_cost, m_cost, thread_n, pwd_array, inlen,
salt_array, inlen, out, outlen, NULL, 0, type,
ARGON2_VERSION_NUMBER);

/*stop_cycles = rdtsc();
stop_time = clock();*/

/*delta = (stop_cycles - start_cycles) / (m_cost);
mcycles = (double)(stop_cycles - start_cycles) / (1UL << 20);
run_time += ((double)stop_time - start_time) / (CLOCKS_PER_SEC);*/

if( clock_gettime( CLOCK_REALTIME, &stop) == -1 ) {
perror( "clock gettime" );
exit( EXIT_FAILURE );
}
else
{
clock_gettime(CLOCK_REALTIME, &stop);
}

accum = ( (double)stop.tv_sec - (double)start.tv_sec )
+ ( (double)stop.tv_nsec - (double)start.tv_nsec ) / BILLION;

double mcycles = accum * BILLION;
mcycles = mcycles / (1UL << 20);
uint64_t delta = accum * BILLION;
delta = delta / (m_cost);

printf("%s %d iterations %d MiB %d threads: %2.2f cpb %2.2f "
"Mcycles \n", argon2_type2string(type, 1), t_cost,
m_cost >> 10, thread_n, (float)delta / 1024, mcycles);

run_time += run_time / (CLOCKS_PER_SEC);

/*run_time += accum;
printf("%2.4f seconds\n\n", (double)run_time);*/
}

/*run_time = 0;*/
run_time += accum;
printf("%2.4f seconds\n\n", run_time);
}
}

}

int main() {
benchmark();
return ARGON2_OK;
}

This was the bench.c file from the argon2 password hashing function.

The following was the results from machine 2 running the modified program:

Argon2i 3 iterations 1 MiB 1 threads: 3.54 cpb 3.54 Mcycles
Argon2d 3 iterations 1 MiB 1 threads: 3.20 cpb 3.20 Mcycles
Argon2id 3 iterations 1 MiB 1 threads: 2.73 cpb 2.73 Mcycles
0.0029 seconds

Argon2i 3 iterations 1 MiB 2 threads: 2.92 cpb 2.92 Mcycles
Argon2d 3 iterations 1 MiB 2 threads: 2.34 cpb 2.34 Mcycles
Argon2id 3 iterations 1 MiB 2 threads: 2.40 cpb 2.40 Mcycles
0.0025 seconds

Argon2i 3 iterations 1 MiB 4 threads: 1.97 cpb 1.97 Mcycles
Argon2d 3 iterations 1 MiB 4 threads: 1.87 cpb 1.87 Mcycles
Argon2id 3 iterations 1 MiB 4 threads: 1.94 cpb 1.94 Mcycles
0.0020 seconds

Argon2i 3 iterations 1 MiB 8 threads: 3.21 cpb 3.21 Mcycles
Argon2d 3 iterations 1 MiB 8 threads: 3.00 cpb 3.00 Mcycles
Argon2id 3 iterations 1 MiB 8 threads: 2.81 cpb 2.81 Mcycles
0.0030 seconds

Argon2i 3 iterations 2 MiB 1 threads: 1.40 cpb 2.79 Mcycles
Argon2d 3 iterations 2 MiB 1 threads: 1.21 cpb 2.42 Mcycles
Argon2id 3 iterations 2 MiB 1 threads: 1.04 cpb 2.08 Mcycles
0.0022 seconds

Argon2i 3 iterations 2 MiB 2 threads: 1.44 cpb 2.88 Mcycles
Argon2d 3 iterations 2 MiB 2 threads: 1.36 cpb 2.72 Mcycles
Argon2id 3 iterations 2 MiB 2 threads: 1.37 cpb 2.73 Mcycles
0.0029 seconds

Argon2i 3 iterations 2 MiB 4 threads: 0.99 cpb 1.99 Mcycles
Argon2d 3 iterations 2 MiB 4 threads: 1.11 cpb 2.21 Mcycles
Argon2id 3 iterations 2 MiB 4 threads: 1.05 cpb 2.11 Mcycles
0.0022 seconds

Argon2i 3 iterations 2 MiB 8 threads: 1.67 cpb 3.35 Mcycles
Argon2d 3 iterations 2 MiB 8 threads: 1.54 cpb 3.08 Mcycles
Argon2id 3 iterations 2 MiB 8 threads: 1.51 cpb 3.02 Mcycles
0.0032 seconds

Argon2i 3 iterations 4 MiB 1 threads: 1.41 cpb 5.65 Mcycles
Argon2d 3 iterations 4 MiB 1 threads: 1.09 cpb 4.38 Mcycles
Argon2id 3 iterations 4 MiB 1 threads: 0.98 cpb 3.92 Mcycles
0.0041 seconds

Argon2i 3 iterations 4 MiB 2 threads: 1.28 cpb 5.13 Mcycles
Argon2d 3 iterations 4 MiB 2 threads: 1.21 cpb 4.85 Mcycles
Argon2id 3 iterations 4 MiB 2 threads: 1.23 cpb 4.93 Mcycles
0.0052 seconds

Argon2i 3 iterations 4 MiB 4 threads: 0.79 cpb 3.18 Mcycles
Argon2d 3 iterations 4 MiB 4 threads: 0.79 cpb 3.18 Mcycles
Argon2id 3 iterations 4 MiB 4 threads: 0.81 cpb 3.22 Mcycles
0.0034 seconds

Argon2i 3 iterations 4 MiB 8 threads: 1.00 cpb 4.00 Mcycles
Argon2d 3 iterations 4 MiB 8 threads: 0.89 cpb 3.58 Mcycles
Argon2id 3 iterations 4 MiB 8 threads: 0.91 cpb 3.64 Mcycles
0.0038 seconds

Argon2i 3 iterations 8 MiB 1 threads: 1.47 cpb 11.79 Mcycles
Argon2d 3 iterations 8 MiB 1 threads: 1.13 cpb 9.08 Mcycles
Argon2id 3 iterations 8 MiB 1 threads: 0.97 cpb 7.80 Mcycles
0.0082 seconds

Argon2i 3 iterations 8 MiB 2 threads: 1.27 cpb 10.18 Mcycles
Argon2d 3 iterations 8 MiB 2 threads: 0.87 cpb 6.95 Mcycles
Argon2id 3 iterations 8 MiB 2 threads: 0.88 cpb 7.00 Mcycles
0.0073 seconds

Argon2i 3 iterations 8 MiB 4 threads: 0.91 cpb 7.31 Mcycles
Argon2d 3 iterations 8 MiB 4 threads: 0.80 cpb 6.42 Mcycles
Argon2id 3 iterations 8 MiB 4 threads: 0.59 cpb 4.70 Mcycles
0.0049 seconds

Argon2i 3 iterations 8 MiB 8 threads: 0.82 cpb 6.53 Mcycles
Argon2d 3 iterations 8 MiB 8 threads: 0.83 cpb 6.63 Mcycles
Argon2id 3 iterations 8 MiB 8 threads: 0.81 cpb 6.47 Mcycles
0.0068 seconds

Argon2i 3 iterations 16 MiB 1 threads: 1.89 cpb 30.20 Mcycles
Argon2d 3 iterations 16 MiB 1 threads: 1.33 cpb 21.22 Mcycles
Argon2id 3 iterations 16 MiB 1 threads: 1.17 cpb 18.70 Mcycles
0.0196 seconds

Argon2i 3 iterations 16 MiB 2 threads: 1.17 cpb 18.80 Mcycles
Argon2d 3 iterations 16 MiB 2 threads: 0.81 cpb 13.03 Mcycles
Argon2id 3 iterations 16 MiB 2 threads: 0.79 cpb 12.57 Mcycles
0.0132 seconds

Argon2i 3 iterations 16 MiB 4 threads: 0.80 cpb 12.79 Mcycles
Argon2d 3 iterations 16 MiB 4 threads: 0.56 cpb 8.97 Mcycles
Argon2id 3 iterations 16 MiB 4 threads: 0.53 cpb 8.45 Mcycles
0.0089 seconds

Argon2i 3 iterations 16 MiB 8 threads: 0.60 cpb 9.57 Mcycles
Argon2d 3 iterations 16 MiB 8 threads: 0.64 cpb 10.22 Mcycles
Argon2id 3 iterations 16 MiB 8 threads: 0.68 cpb 10.83 Mcycles
0.0114 seconds

Argon2i 3 iterations 32 MiB 1 threads: 1.64 cpb 52.53 Mcycles
Argon2d 3 iterations 32 MiB 1 threads: 1.50 cpb 47.89 Mcycles
Argon2id 3 iterations 32 MiB 1 threads: 1.49 cpb 47.84 Mcycles
0.0502 seconds

Argon2i 3 iterations 32 MiB 2 threads: 1.28 cpb 41.08 Mcycles
Argon2d 3 iterations 32 MiB 2 threads: 1.29 cpb 41.17 Mcycles
Argon2id 3 iterations 32 MiB 2 threads: 1.38 cpb 44.31 Mcycles
0.0465 seconds

Argon2i 3 iterations 32 MiB 4 threads: 0.86 cpb 27.46 Mcycles
Argon2d 3 iterations 32 MiB 4 threads: 0.74 cpb 23.58 Mcycles
Argon2id 3 iterations 32 MiB 4 threads: 0.65 cpb 20.68 Mcycles
0.0217 seconds

Argon2i 3 iterations 32 MiB 8 threads: 0.68 cpb 21.81 Mcycles
Argon2d 3 iterations 32 MiB 8 threads: 0.69 cpb 22.09 Mcycles
Argon2id 3 iterations 32 MiB 8 threads: 0.68 cpb 21.73 Mcycles
0.0228 seconds

Argon2i 3 iterations 64 MiB 1 threads: 1.61 cpb 103.11 Mcycles
Argon2d 3 iterations 64 MiB 1 threads: 1.58 cpb 101.05 Mcycles
Argon2id 3 iterations 64 MiB 1 threads: 1.58 cpb 101.25 Mcycles
0.1062 seconds

Argon2i 3 iterations 64 MiB 2 threads: 1.44 cpb 92.42 Mcycles
Argon2d 3 iterations 64 MiB 2 threads: 1.18 cpb 75.76 Mcycles
Argon2id 3 iterations 64 MiB 2 threads: 1.18 cpb 75.28 Mcycles
0.0789 seconds

Argon2i 3 iterations 64 MiB 4 threads: 0.76 cpb 48.48 Mcycles
Argon2d 3 iterations 64 MiB 4 threads: 0.65 cpb 41.49 Mcycles
Argon2id 3 iterations 64 MiB 4 threads: 0.63 cpb 40.49 Mcycles
0.0425 seconds

Argon2i 3 iterations 64 MiB 8 threads: 0.58 cpb 37.08 Mcycles
Argon2d 3 iterations 64 MiB 8 threads: 0.61 cpb 38.88 Mcycles
Argon2id 3 iterations 64 MiB 8 threads: 0.61 cpb 39.02 Mcycles
0.0409 seconds

Argon2i 3 iterations 128 MiB 1 threads: 1.72 cpb 220.68 Mcycles
Argon2d 3 iterations 128 MiB 1 threads: 1.65 cpb 211.20 Mcycles
Argon2id 3 iterations 128 MiB 1 threads: 1.61 cpb 206.66 Mcycles
0.2167 seconds

Argon2i 3 iterations 128 MiB 2 threads: 1.12 cpb 143.16 Mcycles
Argon2d 3 iterations 128 MiB 2 threads: 1.11 cpb 142.53 Mcycles
Argon2id 3 iterations 128 MiB 2 threads: 1.11 cpb 142.67 Mcycles
0.1496 seconds

Argon2i 3 iterations 128 MiB 4 threads: 0.68 cpb 87.52 Mcycles
Argon2d 3 iterations 128 MiB 4 threads: 0.68 cpb 86.96 Mcycles
Argon2id 3 iterations 128 MiB 4 threads: 0.68 cpb 86.78 Mcycles
0.0910 seconds

Argon2i 3 iterations 128 MiB 8 threads: 0.59 cpb 75.56 Mcycles
Argon2d 3 iterations 128 MiB 8 threads: 0.55 cpb 70.96 Mcycles
Argon2id 3 iterations 128 MiB 8 threads: 0.58 cpb 74.02 Mcycles
0.0776 seconds

Argon2i 3 iterations 256 MiB 1 threads: 1.75 cpb 447.73 Mcycles
Argon2d 3 iterations 256 MiB 1 threads: 1.62 cpb 414.48 Mcycles
Argon2id 3 iterations 256 MiB 1 threads: 1.62 cpb 415.25 Mcycles
0.4354 seconds

Argon2i 3 iterations 256 MiB 2 threads: 1.17 cpb 299.72 Mcycles
Argon2d 3 iterations 256 MiB 2 threads: 1.07 cpb 274.17 Mcycles
Argon2id 3 iterations 256 MiB 2 threads: 1.14 cpb 291.48 Mcycles
0.3056 seconds

Argon2i 3 iterations 256 MiB 4 threads: 0.70 cpb 180.25 Mcycles
Argon2d 3 iterations 256 MiB 4 threads: 0.71 cpb 182.79 Mcycles
Argon2id 3 iterations 256 MiB 4 threads: 0.70 cpb 180.23 Mcycles
0.1890 seconds

Argon2i 3 iterations 256 MiB 8 threads: 0.54 cpb 137.75 Mcycles
Argon2d 3 iterations 256 MiB 8 threads: 0.54 cpb 139.23 Mcycles
Argon2id 3 iterations 256 MiB 8 threads: 0.53 cpb 134.82 Mcycles
0.1414 seconds

This is strange as the original had a result of this:

2292451852727619283Argon2i 3 iterations 1 MiB 1 threads: 10574.63 cpb 10574.64 Mcycles
9176590593415145417Argon2d 3 iterations 1 MiB 1 threads: 10573.79 cpb 10573.79 Mcycles
16050798784100622823Argon2id 3 iterations 1 MiB 1 threads: 10571.93 cpb 10571.94 Mcycles
0.0100 seconds

2290633554493452044Argon2i 3 iterations 1 MiB 2 threads: 10574.07 cpb 10574.07 Mcycles
29783368801178634129Argon2d 3 iterations 1 MiB 2 threads: 10571.67 cpb 10571.67 Mcycles
36635109851864293143Argon2id 3 iterations 1 MiB 2 threads: 10572.13 cpb 10572.13 Mcycles
0.0160 seconds
Note: The beginning of each line has a random set of numbers. The cpb and the Mcycles were really long meaning the CPU is slower to hash the result.

I will now change the optimization level to -O3 and retest the program.

Result:
Argon2i 3 iterations 1 MiB 1 threads: 3.42 cpb 3.42 Mcycles
Argon2d 3 iterations 1 MiB 1 threads: 3.18 cpb 3.18 Mcycles
Argon2id 3 iterations 1 MiB 1 threads: 2.72 cpb 2.72 Mcycles
0.0029 seconds

Argon2i 3 iterations 1 MiB 2 threads: 2.49 cpb 2.49 Mcycles
Argon2d 3 iterations 1 MiB 2 threads: 2.33 cpb 2.33 Mcycles
Argon2id 3 iterations 1 MiB 2 threads: 2.30 cpb 2.31 Mcycles
0.0024 seconds

Argon2i 3 iterations 1 MiB 4 threads: 2.23 cpb 2.23 Mcycles
Argon2d 3 iterations 1 MiB 4 threads: 2.06 cpb 2.06 Mcycles
Argon2id 3 iterations 1 MiB 4 threads: 1.71 cpb 1.71 Mcycles
0.0018 seconds

Argon2i 3 iterations 1 MiB 8 threads: 3.17 cpb 3.17 Mcycles
Argon2d 3 iterations 1 MiB 8 threads: 3.00 cpb 3.00 Mcycles
Argon2id 3 iterations 1 MiB 8 threads: 2.99 cpb 2.99 Mcycles
0.0031 seconds

Argon2i 3 iterations 2 MiB 1 threads: 1.41 cpb 2.82 Mcycles
Argon2d 3 iterations 2 MiB 1 threads: 1.23 cpb 2.47 Mcycles
Argon2id 3 iterations 2 MiB 1 threads: 1.04 cpb 2.07 Mcycles
0.0022 seconds

Argon2i 3 iterations 2 MiB 2 threads: 1.39 cpb 2.79 Mcycles
Argon2d 3 iterations 2 MiB 2 threads: 1.36 cpb 2.73 Mcycles
Argon2id 3 iterations 2 MiB 2 threads: 1.34 cpb 2.69 Mcycles
0.0028 seconds

Argon2i 3 iterations 2 MiB 4 threads: 1.02 cpb 2.04 Mcycles
Argon2d 3 iterations 2 MiB 4 threads: 0.99 cpb 1.99 Mcycles
Argon2id 3 iterations 2 MiB 4 threads: 1.00 cpb 1.99 Mcycles
0.0021 seconds

Argon2i 3 iterations 2 MiB 8 threads: 1.71 cpb 3.43 Mcycles
Argon2d 3 iterations 2 MiB 8 threads: 1.68 cpb 3.37 Mcycles
Argon2id 3 iterations 2 MiB 8 threads: 1.64 cpb 3.29 Mcycles
0.0034 seconds

Argon2i 3 iterations 4 MiB 1 threads: 1.37 cpb 5.49 Mcycles
Argon2d 3 iterations 4 MiB 1 threads: 1.10 cpb 4.40 Mcycles
Argon2id 3 iterations 4 MiB 1 threads: 1.01 cpb 4.06 Mcycles
0.0043 seconds

Argon2i 3 iterations 4 MiB 2 threads: 1.35 cpb 5.40 Mcycles
Argon2d 3 iterations 4 MiB 2 threads: 1.18 cpb 4.71 Mcycles
Argon2id 3 iterations 4 MiB 2 threads: 1.19 cpb 4.78 Mcycles
0.0050 seconds

Argon2i 3 iterations 4 MiB 4 threads: 0.91 cpb 3.65 Mcycles
Argon2d 3 iterations 4 MiB 4 threads: 0.91 cpb 3.63 Mcycles
Argon2id 3 iterations 4 MiB 4 threads: 0.90 cpb 3.62 Mcycles
0.0038 seconds

Argon2i 3 iterations 4 MiB 8 threads: 1.02 cpb 4.08 Mcycles
Argon2d 3 iterations 4 MiB 8 threads: 1.01 cpb 4.03 Mcycles
Argon2id 3 iterations 4 MiB 8 threads: 0.95 cpb 3.80 Mcycles
0.0040 seconds

Argon2i 3 iterations 8 MiB 1 threads: 1.40 cpb 11.22 Mcycles
Argon2d 3 iterations 8 MiB 1 threads: 1.16 cpb 9.25 Mcycles
Argon2id 3 iterations 8 MiB 1 threads: 0.99 cpb 7.93 Mcycles
0.0083 seconds

Argon2i 3 iterations 8 MiB 2 threads: 1.42 cpb 11.40 Mcycles
Argon2d 3 iterations 8 MiB 2 threads: 0.88 cpb 7.03 Mcycles
Argon2id 3 iterations 8 MiB 2 threads: 0.75 cpb 6.02 Mcycles
0.0063 seconds

Argon2i 3 iterations 8 MiB 4 threads: 0.94 cpb 7.49 Mcycles
Argon2d 3 iterations 8 MiB 4 threads: 0.74 cpb 5.96 Mcycles
Argon2id 3 iterations 8 MiB 4 threads: 0.55 cpb 4.44 Mcycles
0.0047 seconds

Argon2i 3 iterations 8 MiB 8 threads: 0.71 cpb 5.67 Mcycles
Argon2d 3 iterations 8 MiB 8 threads: 0.76 cpb 6.11 Mcycles
Argon2id 3 iterations 8 MiB 8 threads: 0.75 cpb 5.97 Mcycles
0.0063 seconds

Argon2i 3 iterations 16 MiB 1 threads: 1.62 cpb 25.97 Mcycles
Argon2d 3 iterations 16 MiB 1 threads: 1.27 cpb 20.26 Mcycles
Argon2id 3 iterations 16 MiB 1 threads: 1.14 cpb 18.20 Mcycles
0.0191 seconds

Argon2i 3 iterations 16 MiB 2 threads: 1.35 cpb 21.65 Mcycles
Argon2d 3 iterations 16 MiB 2 threads: 0.98 cpb 15.62 Mcycles
Argon2id 3 iterations 16 MiB 2 threads: 0.92 cpb 14.74 Mcycles
0.0155 seconds

Argon2i 3 iterations 16 MiB 4 threads: 0.84 cpb 13.44 Mcycles
Argon2d 3 iterations 16 MiB 4 threads: 0.54 cpb 8.65 Mcycles
Argon2id 3 iterations 16 MiB 4 threads: 0.58 cpb 9.27 Mcycles
0.0097 seconds

Argon2i 3 iterations 16 MiB 8 threads: 0.61 cpb 9.80 Mcycles
Argon2d 3 iterations 16 MiB 8 threads: 0.61 cpb 9.72 Mcycles
Argon2id 3 iterations 16 MiB 8 threads: 0.67 cpb 10.75 Mcycles
0.0113 seconds

Argon2i 3 iterations 32 MiB 1 threads: 1.58 cpb 50.49 Mcycles
Argon2d 3 iterations 32 MiB 1 threads: 1.47 cpb 46.95 Mcycles
Argon2id 3 iterations 32 MiB 1 threads: 1.47 cpb 47.09 Mcycles
0.0494 seconds

Argon2i 3 iterations 32 MiB 2 threads: 1.46 cpb 46.79 Mcycles
Argon2d 3 iterations 32 MiB 2 threads: 1.39 cpb 44.55 Mcycles
Argon2id 3 iterations 32 MiB 2 threads: 1.42 cpb 45.41 Mcycles
0.0476 seconds

Argon2i 3 iterations 32 MiB 4 threads: 0.85 cpb 27.25 Mcycles
Argon2d 3 iterations 32 MiB 4 threads: 0.63 cpb 20.09 Mcycles
Argon2id 3 iterations 32 MiB 4 threads: 0.67 cpb 21.30 Mcycles
0.0223 seconds

Argon2i 3 iterations 32 MiB 8 threads: 0.65 cpb 20.74 Mcycles
Argon2d 3 iterations 32 MiB 8 threads: 0.67 cpb 21.54 Mcycles
Argon2id 3 iterations 32 MiB 8 threads: 0.67 cpb 21.34 Mcycles
0.0224 seconds

Argon2i 3 iterations 64 MiB 1 threads: 1.60 cpb 102.66 Mcycles
Argon2d 3 iterations 64 MiB 1 threads: 1.55 cpb 99.24 Mcycles
Argon2id 3 iterations 64 MiB 1 threads: 1.55 cpb 99.25 Mcycles
0.1041 seconds

Argon2i 3 iterations 64 MiB 2 threads: 1.22 cpb 78.43 Mcycles
Argon2d 3 iterations 64 MiB 2 threads: 1.26 cpb 80.65 Mcycles
Argon2id 3 iterations 64 MiB 2 threads: 1.20 cpb 76.73 Mcycles
0.0805 seconds

Argon2i 3 iterations 64 MiB 4 threads: 0.76 cpb 48.88 Mcycles
Argon2d 3 iterations 64 MiB 4 threads: 0.68 cpb 43.39 Mcycles
Argon2id 3 iterations 64 MiB 4 threads: 0.74 cpb 47.31 Mcycles
0.0496 seconds

Argon2i 3 iterations 64 MiB 8 threads: 0.65 cpb 41.82 Mcycles
Argon2d 3 iterations 64 MiB 8 threads: 0.63 cpb 40.18 Mcycles
Argon2id 3 iterations 64 MiB 8 threads: 0.67 cpb 42.62 Mcycles
0.0447 seconds

Argon2i 3 iterations 128 MiB 1 threads: 1.66 cpb 212.21 Mcycles
Argon2d 3 iterations 128 MiB 1 threads: 1.72 cpb 219.73 Mcycles
Argon2id 3 iterations 128 MiB 1 threads: 1.64 cpb 209.82 Mcycles
0.2200 seconds

Argon2i 3 iterations 128 MiB 2 threads: 1.24 cpb 158.31 Mcycles
Argon2d 3 iterations 128 MiB 2 threads: 1.11 cpb 142.63 Mcycles
Argon2id 3 iterations 128 MiB 2 threads: 1.19 cpb 152.53 Mcycles
0.1599 seconds

Argon2i 3 iterations 128 MiB 4 threads: 0.75 cpb 95.45 Mcycles
Argon2d 3 iterations 128 MiB 4 threads: 0.68 cpb 86.76 Mcycles
Argon2id 3 iterations 128 MiB 4 threads: 0.68 cpb 87.00 Mcycles
0.0912 seconds

Argon2i 3 iterations 128 MiB 8 threads: 0.57 cpb 72.78 Mcycles
Argon2d 3 iterations 128 MiB 8 threads: 0.58 cpb 74.95 Mcycles
Argon2id 3 iterations 128 MiB 8 threads: 0.59 cpb 75.34 Mcycles
0.0790 seconds

Argon2i 3 iterations 256 MiB 1 threads: 1.76 cpb 451.19 Mcycles
Argon2d 3 iterations 256 MiB 1 threads: 1.69 cpb 433.36 Mcycles
Argon2id 3 iterations 256 MiB 1 threads: 1.60 cpb 408.90 Mcycles
0.4288 seconds

Argon2i 3 iterations 256 MiB 2 threads: 1.16 cpb 296.43 Mcycles
Argon2d 3 iterations 256 MiB 2 threads: 1.09 cpb 279.88 Mcycles
Argon2id 3 iterations 256 MiB 2 threads: 1.18 cpb 301.38 Mcycles
0.3160 seconds

Argon2i 3 iterations 256 MiB 4 threads: 0.74 cpb 189.06 Mcycles
Argon2d 3 iterations 256 MiB 4 threads: 0.68 cpb 174.25 Mcycles
Argon2id 3 iterations 256 MiB 4 threads: 0.71 cpb 180.84 Mcycles
0.1896 seconds

Argon2i 3 iterations 256 MiB 8 threads: 0.50 cpb 128.98 Mcycles
Argon2d 3 iterations 256 MiB 8 threads: 0.55 cpb 141.48 Mcycles
Argon2id 3 iterations 256 MiB 8 threads: 0.52 cpb 132.25 Mcycles
0.1387 seconds

Argon2i 3 iterations 512 MiB 1 threads: 1.75 cpb 895.61 Mcycles
Argon2d 3 iterations 512 MiB 1 threads: 1.65 cpb 844.13 Mcycles
Argon2id 3 iterations 512 MiB 1 threads: 1.65 cpb 843.89 Mcycles
0.8849 seconds

Argon2i 3 iterations 512 MiB 2 threads: 1.10 cpb 563.01 Mcycles
Argon2d 3 iterations 512 MiB 2 threads: 1.12 cpb 573.63 Mcycles
Argon2id 3 iterations 512 MiB 2 threads: 1.12 cpb 575.07 Mcycles
0.6030 seconds

Argon2i 3 iterations 512 MiB 4 threads: 0.67 cpb 341.87 Mcycles
Argon2d 3 iterations 512 MiB 4 threads: 0.69 cpb 351.20 Mcycles
Argon2id 3 iterations 512 MiB 4 threads: 0.66 cpb 337.59 Mcycles
0.3540 seconds

Argon2i 3 iterations 512 MiB 8 threads: 0.50 cpb 255.14 Mcycles
Argon2d 3 iterations 512 MiB 8 threads: 0.49 cpb 253.08 Mcycles
Argon2id 3 iterations 512 MiB 8 threads: 0.50 cpb 258.21 Mcycles
0.2708 seconds

The result runs fairly fast. This is expected as the optimization level is -O3.

Test 3 (Extra):

I will be testing on a third machine.

Specifications:

8 core aarch64 X-Gene CPU
Two sticks of DDR3 4096 MB RAM @ 1600 MHz
Fedora 28 64-bit Linux Operating System

Result:

This is with optimization level -O2.

Building without optimizations
cc -std=c89 -O2 -Wall -g -Iinclude -Isrc -pthread src/argon2.c src/core.c src/blake2/blake2b.c src/thread.c src/encoding.c src/ref.c src/bench.c -o bench
Argon2i 3 iterations 1 MiB 1 threads: 5.51 cpb 5.51 Mcycles
Argon2d 3 iterations 1 MiB 1 threads: 5.18 cpb 5.18 Mcycles
Argon2id 3 iterations 1 MiB 1 threads: 4.78 cpb 4.78 Mcycles
0.0050 seconds

Argon2i 3 iterations 1 MiB 2 threads: 4.00 cpb 4.00 Mcycles
Argon2d 3 iterations 1 MiB 2 threads: 3.67 cpb 3.67 Mcycles
Argon2id 3 iterations 1 MiB 2 threads: 3.76 cpb 3.76 Mcycles
0.0039 seconds

Argon2i 3 iterations 1 MiB 4 threads: 3.16 cpb 3.16 Mcycles
Argon2d 3 iterations 1 MiB 4 threads: 2.95 cpb 2.95 Mcycles
Argon2id 3 iterations 1 MiB 4 threads: 3.07 cpb 3.07 Mcycles
0.0032 seconds

Argon2i 3 iterations 1 MiB 8 threads: 5.75 cpb 5.75 Mcycles
Argon2d 3 iterations 1 MiB 8 threads: 5.90 cpb 5.90 Mcycles
Argon2id 3 iterations 1 MiB 8 threads: 6.04 cpb 6.04 Mcycles
0.0063 seconds

Argon2i 3 iterations 2 MiB 1 threads: 5.48 cpb 10.96 Mcycles
Argon2d 3 iterations 2 MiB 1 threads: 5.27 cpb 10.53 Mcycles
Argon2id 3 iterations 2 MiB 1 threads: 4.80 cpb 9.59 Mcycles
0.0101 seconds

Argon2i 3 iterations 2 MiB 2 threads: 3.18 cpb 6.35 Mcycles
Argon2d 3 iterations 2 MiB 2 threads: 3.14 cpb 6.27 Mcycles
Argon2id 3 iterations 2 MiB 2 threads: 3.05 cpb 6.10 Mcycles
0.0064 seconds

Argon2i 3 iterations 2 MiB 4 threads: 2.38 cpb 4.76 Mcycles
Argon2d 3 iterations 2 MiB 4 threads: 2.33 cpb 4.67 Mcycles
Argon2id 3 iterations 2 MiB 4 threads: 2.36 cpb 4.72 Mcycles
0.0050 seconds

Argon2i 3 iterations 2 MiB 8 threads: 3.62 cpb 7.23 Mcycles
Argon2d 3 iterations 2 MiB 8 threads: 3.58 cpb 7.15 Mcycles
Argon2id 3 iterations 2 MiB 8 threads: 3.67 cpb 7.34 Mcycles
0.0077 seconds

Argon2i 3 iterations 4 MiB 1 threads: 5.58 cpb 22.32 Mcycles
Argon2d 3 iterations 4 MiB 1 threads: 5.09 cpb 20.35 Mcycles
Argon2id 3 iterations 4 MiB 1 threads: 4.84 cpb 19.36 Mcycles
0.0203 seconds

Argon2i 3 iterations 4 MiB 2 threads: 2.87 cpb 11.49 Mcycles
Argon2d 3 iterations 4 MiB 2 threads: 2.86 cpb 11.45 Mcycles
Argon2id 3 iterations 4 MiB 2 threads: 2.84 cpb 11.38 Mcycles
0.0119 seconds

Argon2i 3 iterations 4 MiB 4 threads: 1.89 cpb 7.54 Mcycles
Argon2d 3 iterations 4 MiB 4 threads: 1.82 cpb 7.30 Mcycles
Argon2id 3 iterations 4 MiB 4 threads: 1.80 cpb 7.21 Mcycles
0.0076 seconds

Argon2i 3 iterations 4 MiB 8 threads: 2.47 cpb 9.90 Mcycles
Argon2d 3 iterations 4 MiB 8 threads: 2.55 cpb 10.19 Mcycles
Argon2id 3 iterations 4 MiB 8 threads: 2.63 cpb 10.51 Mcycles
0.0110 seconds

Argon2i 3 iterations 8 MiB 1 threads: 5.82 cpb 46.54 Mcycles
Argon2d 3 iterations 8 MiB 1 threads: 5.33 cpb 42.66 Mcycles
Argon2id 3 iterations 8 MiB 1 threads: 5.04 cpb 40.33 Mcycles
0.0423 seconds

Argon2i 3 iterations 8 MiB 2 threads: 2.84 cpb 22.69 Mcycles
Argon2d 3 iterations 8 MiB 2 threads: 2.78 cpb 22.22 Mcycles
Argon2id 3 iterations 8 MiB 2 threads: 2.83 cpb 22.65 Mcycles
0.0237 seconds

Argon2i 3 iterations 8 MiB 4 threads: 1.65 cpb 13.20 Mcycles
Argon2d 3 iterations 8 MiB 4 threads: 1.63 cpb 13.07 Mcycles
Argon2id 3 iterations 8 MiB 4 threads: 1.64 cpb 13.11 Mcycles
0.0137 seconds

Argon2i 3 iterations 8 MiB 8 threads: 2.09 cpb 16.73 Mcycles
Argon2d 3 iterations 8 MiB 8 threads: 1.95 cpb 15.62 Mcycles
Argon2id 3 iterations 8 MiB 8 threads: 2.36 cpb 18.85 Mcycles
0.0198 seconds

Argon2i 3 iterations 16 MiB 1 threads: 6.14 cpb 98.25 Mcycles
Argon2d 3 iterations 16 MiB 1 threads: 5.70 cpb 91.25 Mcycles
Argon2id 3 iterations 16 MiB 1 threads: 5.47 cpb 87.54 Mcycles
0.0918 seconds

Argon2i 3 iterations 16 MiB 2 threads: 2.98 cpb 47.67 Mcycles
Argon2d 3 iterations 16 MiB 2 threads: 2.93 cpb 46.88 Mcycles
Argon2id 3 iterations 16 MiB 2 threads: 2.94 cpb 47.08 Mcycles
0.0494 seconds

Argon2i 3 iterations 16 MiB 4 threads: 1.62 cpb 25.96 Mcycles
Argon2d 3 iterations 16 MiB 4 threads: 1.61 cpb 25.72 Mcycles
Argon2id 3 iterations 16 MiB 4 threads: 1.62 cpb 25.90 Mcycles
0.0272 seconds

Argon2i 3 iterations 16 MiB 8 threads: 1.79 cpb 28.67 Mcycles
Argon2d 3 iterations 16 MiB 8 threads: 1.75 cpb 28.07 Mcycles
Argon2id 3 iterations 16 MiB 8 threads: 1.82 cpb 29.16 Mcycles
0.0306 seconds

Argon2i 3 iterations 32 MiB 1 threads: 6.34 cpb 203.00 Mcycles
Argon2d 3 iterations 32 MiB 1 threads: 6.26 cpb 200.26 Mcycles
Argon2id 3 iterations 32 MiB 1 threads: 6.27 cpb 200.72 Mcycles
0.2105 seconds

Argon2i 3 iterations 32 MiB 2 threads: 3.42 cpb 109.52 Mcycles
Argon2d 3 iterations 32 MiB 2 threads: 3.38 cpb 108.09 Mcycles
Argon2id 3 iterations 32 MiB 2 threads: 3.38 cpb 108.12 Mcycles
0.1134 seconds

Argon2i 3 iterations 32 MiB 4 threads: 1.93 cpb 61.63 Mcycles
Argon2d 3 iterations 32 MiB 4 threads: 1.90 cpb 60.92 Mcycles
Argon2id 3 iterations 32 MiB 4 threads: 1.94 cpb 62.00 Mcycles
0.0650 seconds

Argon2i 3 iterations 32 MiB 8 threads: 1.94 cpb 62.07 Mcycles
Argon2d 3 iterations 32 MiB 8 threads: 1.96 cpb 62.58 Mcycles
Argon2id 3 iterations 32 MiB 8 threads: 1.92 cpb 61.30 Mcycles
0.0643 seconds

Argon2i 3 iterations 64 MiB 1 threads: 6.48 cpb 414.84 Mcycles
Argon2d 3 iterations 64 MiB 1 threads: 6.40 cpb 409.88 Mcycles
Argon2id 3 iterations 64 MiB 1 threads: 6.41 cpb 410.55 Mcycles
0.4305 seconds

Argon2i 3 iterations 64 MiB 2 threads: 3.47 cpb 221.90 Mcycles
Argon2d 3 iterations 64 MiB 2 threads: 3.43 cpb 219.27 Mcycles
Argon2id 3 iterations 64 MiB 2 threads: 3.43 cpb 219.69 Mcycles
0.2304 seconds

Argon2i 3 iterations 64 MiB 4 threads: 1.92 cpb 123.08 Mcycles
Argon2d 3 iterations 64 MiB 4 threads: 1.90 cpb 121.74 Mcycles
Argon2id 3 iterations 64 MiB 4 threads: 1.93 cpb 123.49 Mcycles
0.1295 seconds

Argon2i 3 iterations 64 MiB 8 threads: 1.82 cpb 116.51 Mcycles
Argon2d 3 iterations 64 MiB 8 threads: 1.79 cpb 114.79 Mcycles
Argon2id 3 iterations 64 MiB 8 threads: 1.80 cpb 115.02 Mcycles
0.1206 seconds

Argon2i 3 iterations 128 MiB 1 threads: 6.60 cpb 844.52 Mcycles
Argon2d 3 iterations 128 MiB 1 threads: 6.52 cpb 835.11 Mcycles
Argon2id 3 iterations 128 MiB 1 threads: 6.54 cpb 836.68 Mcycles
0.8773 seconds

Argon2i 3 iterations 128 MiB 2 threads: 3.52 cpb 450.00 Mcycles
Argon2d 3 iterations 128 MiB 2 threads: 3.47 cpb 444.85 Mcycles
Argon2id 3 iterations 128 MiB 2 threads: 3.49 cpb 446.23 Mcycles
0.4679 seconds

Argon2i 3 iterations 128 MiB 4 threads: 1.94 cpb 247.84 Mcycles
Argon2d 3 iterations 128 MiB 4 threads: 1.91 cpb 245.05 Mcycles
Argon2id 3 iterations 128 MiB 4 threads: 1.92 cpb 245.15 Mcycles
0.2571 seconds

Argon2i 3 iterations 128 MiB 8 threads: 1.73 cpb 221.21 Mcycles
Argon2d 3 iterations 128 MiB 8 threads: 1.70 cpb 217.79 Mcycles
Argon2id 3 iterations 128 MiB 8 threads: 1.64 cpb 209.97 Mcycles
0.2202 seconds

Argon2i 3 iterations 256 MiB 1 threads: 6.69 cpb 1712.64 Mcycles
Argon2d 3 iterations 256 MiB 1 threads: 6.62 cpb 1694.77 Mcycles
Argon2id 3 iterations 256 MiB 1 threads: 6.63 cpb 1696.72 Mcycles
1.7791 seconds

Argon2i 3 iterations 256 MiB 2 threads: 3.55 cpb 909.09 Mcycles
Argon2d 3 iterations 256 MiB 2 threads: 3.51 cpb 899.22 Mcycles
Argon2id 3 iterations 256 MiB 2 threads: 3.52 cpb 900.67 Mcycles
0.9444 seconds

Argon2i 3 iterations 256 MiB 4 threads: 1.95 cpb 499.72 Mcycles
Argon2d 3 iterations 256 MiB 4 threads: 1.94 cpb 497.66 Mcycles
Argon2id 3 iterations 256 MiB 4 threads: 1.94 cpb 496.66 Mcycles
0.5208 seconds

Argon2i 3 iterations 256 MiB 8 threads: 1.48 cpb 379.07 Mcycles
Argon2d 3 iterations 256 MiB 8 threads: 1.55 cpb 398.15 Mcycles
Argon2id 3 iterations 256 MiB 8 threads: 1.58 cpb 403.45 Mcycles
0.4230 seconds

Argon2i 3 iterations 512 MiB 1 threads: 6.75 cpb 3458.96 Mcycles
Argon2d 3 iterations 512 MiB 1 threads: 6.68 cpb 3419.92 Mcycles
Argon2id 3 iterations 512 MiB 1 threads: 6.69 cpb 3426.03 Mcycles
3.5925 seconds

Argon2i 3 iterations 512 MiB 2 threads: 3.58 cpb 1835.84 Mcycles
Argon2d 3 iterations 512 MiB 2 threads: 3.55 cpb 1816.11 Mcycles
Argon2id 3 iterations 512 MiB 2 threads: 3.55 cpb 1819.26 Mcycles
1.9076 seconds

Argon2i 3 iterations 512 MiB 4 threads: 1.97 cpb 1009.56 Mcycles
Argon2d 3 iterations 512 MiB 4 threads: 1.95 cpb 997.45 Mcycles
Argon2id 3 iterations 512 MiB 4 threads: 2.01 cpb 1028.11 Mcycles
1.0780 seconds

Argon2i 3 iterations 512 MiB 8 threads: 1.41 cpb 721.65 Mcycles
Argon2d 3 iterations 512 MiB 8 threads: 1.64 cpb 839.50 Mcycles
Argon2id 3 iterations 512 MiB 8 threads: 1.69 cpb 865.63 Mcycles
0.9077 seconds

This machine has a slight issue in terms of running quickly. This machine also had less memory than the other two machines. I guess this is expected as a result.

Moving on to the next optimization level -O3.

Result:
Building without optimizations
cc -std=c89 -O3 -Wall -g -Iinclude -Isrc -pthread src/argon2.c src/core.c src/blake2/blake2b.c src/thread.c src/encoding.c src/ref.c src/bench.c -o bench
Argon2i 3 iterations 1 MiB 1 threads: 5.75 cpb 5.75 Mcycles
Argon2d 3 iterations 1 MiB 1 threads: 5.45 cpb 5.45 Mcycles
Argon2id 3 iterations 1 MiB 1 threads: 5.04 cpb 5.04 Mcycles
0.0053 seconds

Argon2i 3 iterations 1 MiB 2 threads: 3.97 cpb 3.97 Mcycles
Argon2d 3 iterations 1 MiB 2 threads: 3.59 cpb 3.59 Mcycles
Argon2id 3 iterations 1 MiB 2 threads: 3.54 cpb 3.54 Mcycles
0.0037 seconds

Argon2i 3 iterations 1 MiB 4 threads: 3.00 cpb 3.00 Mcycles
Argon2d 3 iterations 1 MiB 4 threads: 2.84 cpb 2.84 Mcycles
Argon2id 3 iterations 1 MiB 4 threads: 2.77 cpb 2.77 Mcycles
0.0029 seconds

Argon2i 3 iterations 1 MiB 8 threads: 5.19 cpb 5.20 Mcycles
Argon2d 3 iterations 1 MiB 8 threads: 5.07 cpb 5.07 Mcycles
Argon2id 3 iterations 1 MiB 8 threads: 4.92 cpb 4.93 Mcycles
0.0052 seconds

Argon2i 3 iterations 2 MiB 1 threads: 5.70 cpb 11.40 Mcycles
Argon2d 3 iterations 2 MiB 1 threads: 5.49 cpb 10.98 Mcycles
Argon2id 3 iterations 2 MiB 1 threads: 5.07 cpb 10.14 Mcycles
0.0106 seconds

Argon2i 3 iterations 2 MiB 2 threads: 3.19 cpb 6.39 Mcycles
Argon2d 3 iterations 2 MiB 2 threads: 3.15 cpb 6.30 Mcycles
Argon2id 3 iterations 2 MiB 2 threads: 3.21 cpb 6.43 Mcycles
0.0067 seconds

Argon2i 3 iterations 2 MiB 4 threads: 2.20 cpb 4.41 Mcycles
Argon2d 3 iterations 2 MiB 4 threads: 2.22 cpb 4.44 Mcycles
Argon2id 3 iterations 2 MiB 4 threads: 2.16 cpb 4.32 Mcycles
0.0045 seconds

Argon2i 3 iterations 2 MiB 8 threads: 3.68 cpb 7.36 Mcycles
Argon2d 3 iterations 2 MiB 8 threads: 2.80 cpb 5.61 Mcycles
Argon2id 3 iterations 2 MiB 8 threads: 2.79 cpb 5.58 Mcycles
0.0058 seconds

Argon2i 3 iterations 4 MiB 1 threads: 5.81 cpb 23.23 Mcycles
Argon2d 3 iterations 4 MiB 1 threads: 5.34 cpb 21.38 Mcycles
Argon2id 3 iterations 4 MiB 1 threads: 5.11 cpb 20.43 Mcycles
0.0214 seconds

Argon2i 3 iterations 4 MiB 2 threads: 2.98 cpb 11.93 Mcycles
Argon2d 3 iterations 4 MiB 2 threads: 2.93 cpb 11.73 Mcycles
Argon2id 3 iterations 4 MiB 2 threads: 2.93 cpb 11.71 Mcycles
0.0123 seconds

Argon2i 3 iterations 4 MiB 4 threads: 1.82 cpb 7.28 Mcycles
Argon2d 3 iterations 4 MiB 4 threads: 1.77 cpb 7.08 Mcycles
Argon2id 3 iterations 4 MiB 4 threads: 1.77 cpb 7.07 Mcycles
0.0074 seconds

Argon2i 3 iterations 4 MiB 8 threads: 2.50 cpb 9.99 Mcycles
Argon2d 3 iterations 4 MiB 8 threads: 2.70 cpb 10.82 Mcycles
Argon2id 3 iterations 4 MiB 8 threads: 2.89 cpb 11.54 Mcycles
0.0121 seconds

Argon2i 3 iterations 8 MiB 1 threads: 6.05 cpb 48.43 Mcycles
Argon2d 3 iterations 8 MiB 1 threads: 5.58 cpb 44.62 Mcycles
Argon2id 3 iterations 8 MiB 1 threads: 5.31 cpb 42.46 Mcycles
0.0445 seconds

Argon2i 3 iterations 8 MiB 2 threads: 2.95 cpb 23.60 Mcycles
Argon2d 3 iterations 8 MiB 2 threads: 2.91 cpb 23.26 Mcycles
Argon2id 3 iterations 8 MiB 2 threads: 2.90 cpb 23.23 Mcycles
0.0244 seconds

Argon2i 3 iterations 8 MiB 4 threads: 1.66 cpb 13.24 Mcycles
Argon2d 3 iterations 8 MiB 4 threads: 1.64 cpb 13.13 Mcycles
Argon2id 3 iterations 8 MiB 4 threads: 1.64 cpb 13.10 Mcycles
0.0137 seconds

Argon2i 3 iterations 8 MiB 8 threads: 2.03 cpb 16.25 Mcycles
Argon2d 3 iterations 8 MiB 8 threads: 2.29 cpb 18.37 Mcycles
Argon2id 3 iterations 8 MiB 8 threads: 1.92 cpb 15.33 Mcycles
0.0161 seconds

Argon2i 3 iterations 16 MiB 1 threads: 6.37 cpb 102.00 Mcycles
Argon2d 3 iterations 16 MiB 1 threads: 5.97 cpb 95.50 Mcycles
Argon2id 3 iterations 16 MiB 1 threads: 5.74 cpb 91.90 Mcycles
0.0964 seconds

Argon2i 3 iterations 16 MiB 2 threads: 3.12 cpb 49.90 Mcycles
Argon2d 3 iterations 16 MiB 2 threads: 3.07 cpb 49.17 Mcycles
Argon2id 3 iterations 16 MiB 2 threads: 3.08 cpb 49.33 Mcycles
0.0517 seconds

Argon2i 3 iterations 16 MiB 4 threads: 1.70 cpb 27.26 Mcycles
Argon2d 3 iterations 16 MiB 4 threads: 1.68 cpb 26.94 Mcycles
Argon2id 3 iterations 16 MiB 4 threads: 1.69 cpb 27.04 Mcycles
0.0283 seconds

Argon2i 3 iterations 16 MiB 8 threads: 1.81 cpb 28.91 Mcycles
Argon2d 3 iterations 16 MiB 8 threads: 1.87 cpb 29.85 Mcycles
Argon2id 3 iterations 16 MiB 8 threads: 1.87 cpb 29.86 Mcycles
0.0313 seconds

Argon2i 3 iterations 32 MiB 1 threads: 6.57 cpb 210.38 Mcycles
Argon2d 3 iterations 32 MiB 1 threads: 6.51 cpb 208.24 Mcycles
Argon2id 3 iterations 32 MiB 1 threads: 6.52 cpb 208.70 Mcycles
0.2188 seconds

Argon2i 3 iterations 32 MiB 2 threads: 3.53 cpb 112.92 Mcycles
Argon2d 3 iterations 32 MiB 2 threads: 3.49 cpb 111.63 Mcycles
Argon2id 3 iterations 32 MiB 2 threads: 3.50 cpb 111.91 Mcycles
0.1173 seconds

Argon2i 3 iterations 32 MiB 4 threads: 1.97 cpb 63.21 Mcycles
Argon2d 3 iterations 32 MiB 4 threads: 1.96 cpb 62.57 Mcycles
Argon2id 3 iterations 32 MiB 4 threads: 1.96 cpb 62.68 Mcycles
0.0657 seconds

Argon2i 3 iterations 32 MiB 8 threads: 1.89 cpb 60.42 Mcycles
Argon2d 3 iterations 32 MiB 8 threads: 2.00 cpb 63.85 Mcycles
Argon2id 3 iterations 32 MiB 8 threads: 2.03 cpb 64.85 Mcycles
0.0680 seconds

Argon2i 3 iterations 64 MiB 1 threads: 6.72 cpb 430.30 Mcycles
Argon2d 3 iterations 64 MiB 1 threads: 6.66 cpb 426.03 Mcycles
Argon2id 3 iterations 64 MiB 1 threads: 6.67 cpb 426.61 Mcycles
0.4473 seconds

Argon2i 3 iterations 64 MiB 2 threads: 3.58 cpb 229.32 Mcycles
Argon2d 3 iterations 64 MiB 2 threads: 3.54 cpb 226.89 Mcycles
Argon2id 3 iterations 64 MiB 2 threads: 3.55 cpb 227.27 Mcycles
0.2383 seconds

Argon2i 3 iterations 64 MiB 4 threads: 1.98 cpb 126.75 Mcycles
Argon2d 3 iterations 64 MiB 4 threads: 1.96 cpb 125.35 Mcycles
Argon2id 3 iterations 64 MiB 4 threads: 1.96 cpb 125.71 Mcycles
0.1318 seconds

Argon2i 3 iterations 64 MiB 8 threads: 1.87 cpb 119.64 Mcycles
Argon2d 3 iterations 64 MiB 8 threads: 1.94 cpb 123.96 Mcycles
Argon2id 3 iterations 64 MiB 8 threads: 1.90 cpb 121.41 Mcycles
0.1273 seconds

Argon2i 3 iterations 128 MiB 1 threads: 6.83 cpb 874.04 Mcycles
Argon2d 3 iterations 128 MiB 1 threads: 6.77 cpb 866.06 Mcycles
Argon2id 3 iterations 128 MiB 1 threads: 6.78 cpb 867.69 Mcycles
0.9098 seconds

Argon2i 3 iterations 128 MiB 2 threads: 3.62 cpb 464.03 Mcycles
Argon2d 3 iterations 128 MiB 2 threads: 3.60 cpb 460.44 Mcycles
Argon2id 3 iterations 128 MiB 2 threads: 3.59 cpb 460.12 Mcycles
0.4825 seconds

Argon2i 3 iterations 128 MiB 4 threads: 2.00 cpb 255.49 Mcycles
Argon2d 3 iterations 128 MiB 4 threads: 1.97 cpb 251.78 Mcycles
Argon2id 3 iterations 128 MiB 4 threads: 1.97 cpb 252.45 Mcycles
0.2647 seconds

Argon2i 3 iterations 128 MiB 8 threads: 1.85 cpb 236.45 Mcycles
Argon2d 3 iterations 128 MiB 8 threads: 1.71 cpb 218.54 Mcycles
Argon2id 3 iterations 128 MiB 8 threads: 1.71 cpb 219.59 Mcycles
0.2303 seconds

Argon2i 3 iterations 256 MiB 1 threads: 6.92 cpb 1771.62 Mcycles
Argon2d 3 iterations 256 MiB 1 threads: 6.86 cpb 1756.04 Mcycles
Argon2id 3 iterations 256 MiB 1 threads: 6.87 cpb 1759.49 Mcycles
1.8450 seconds

It looks like the result had a slight improvement in time.

Conclusion:

I do not know what could have removed those random numbers from the x86_64 basic test but I would consider this was a success in porting the argon2 password hashing function bench test tool to work on any Linux OS device such as Aarch64 or x86_64.

 

 

 

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s