Requirements/ System Specifications.
Argon2 Password hashing function package:
https://github.com/P-H-C/phc-winner-argon2
Machine 1:
Aarch64 Fedora 28 version of Linux operating system
Cortex-A57 8 core processor
Two sticks of Dual-Channel DIMM DDR3 8GB RAM (16GB in total)
Machine 2:
Intel(R) Xeon(R) CPU E5-1630 v4 @ 3.70GHz
Four sticks of 8GB DIMM DDR4 RAM at 2.4 GHz (32 GB of RAM in total)
x86_64 Fedora 28 version of Linux Operating System
Continuation of Project: Part3 – Optimizing and porting argon2 package using C and Assembler language(Progress 3) blog:
I have test the modified code seen here:
/* * Argon2 reference source code package - reference C implementations * * Copyright 2015 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves * * You may use this work under the terms of a Creative Commons CC0 1.0 * License/Waiver or the Apache Public License 2.0, at your option. The terms of * these licenses can be found at: * * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 * - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 * * You should have received a copy of both of these licenses along with this * software. If not, they may be obtained at the above URLs. */ #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <unistd.h> #define BILLION 1000000000L; #ifdef _MSC_VER #include <intrin.h> #endif #include "argon2.h" /* static uint64_t rdtsc(void) { #ifdef _MSC_VER return __rdtsc(); #else #if defined(__amd64__) || defined(__x86_64__) uint64_t rax, rdx; __asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx) : :); return (rdx << 32) | rax; #elif defined(__i386__) || defined(__i386) || defined(__X86__) uint64_t rax; __asm__ __volatile__("rdtsc" : "=A"(rax) : :); return rax; #elif defined(__aarch64__) return 1; #else return 0; #endif #endif } */ /* * Benchmarks Argon2 with salt length 16, password length 16, t_cost 3, and different m_cost and threads */ static void benchmark() { #define BENCH_OUTLEN 16 #define BENCH_INLEN 16 const uint32_t inlen = BENCH_INLEN; const unsigned outlen = BENCH_OUTLEN; unsigned char out[BENCH_OUTLEN]; unsigned char pwd_array[BENCH_INLEN]; unsigned char salt_array[BENCH_INLEN]; #undef BENCH_INLEN #undef BENCH_OUTLEN struct timespec start, stop; double accum; uint32_t t_cost = 3; uint32_t m_cost; uint32_t thread_test[4] = {1, 2, 4, 8}; argon2_type types[3] = {Argon2_i, Argon2_d, Argon2_id}; memset(pwd_array, 0, inlen); memset(salt_array, 1, inlen); for (m_cost = (uint32_t)1 << 10; m_cost <= (uint32_t)1 << 22; m_cost *= 2) { unsigned i; for (i = 0; i < 4; ++i) { double run_time = 0; uint32_t thread_n = thread_test[i]; unsigned j; for (j = 0; j < 3; ++j) { /*clock_t start_time, stop_time; uint64_t start_cycles, stop_cycles; uint64_t delta; double mcycles;*/ argon2_type type = types[j]; /*start_time = clock(); start_cycles = rdtsc();*/ if( clock_gettime( CLOCK_REALTIME, &start) == -1 ) { perror( "clock gettime" ); exit( EXIT_FAILURE ); } else { clock_gettime(CLOCK_REALTIME, &start); } argon2_hash(t_cost, m_cost, thread_n, pwd_array, inlen, salt_array, inlen, out, outlen, NULL, 0, type, ARGON2_VERSION_NUMBER); /*stop_cycles = rdtsc(); stop_time = clock();*/ /*delta = (stop_cycles - start_cycles) / (m_cost); mcycles = (double)(stop_cycles - start_cycles) / (1UL << 20); run_time += ((double)stop_time - start_time) / (CLOCKS_PER_SEC);*/ if( clock_gettime( CLOCK_REALTIME, &stop) == -1 ) { perror( "clock gettime" ); exit( EXIT_FAILURE ); } else { clock_gettime(CLOCK_REALTIME, &stop); } accum = ( (double)stop.tv_sec - (double)start.tv_sec ) + ( (double)stop.tv_nsec - (double)start.tv_nsec ) / BILLION; double mcycles = accum * BILLION; mcycles = mcycles / (1UL << 20); uint64_t delta = accum * BILLION; delta = delta / (m_cost); printf("%s %d iterations %d MiB %d threads: %2.2f cpb %2.2f " "Mcycles \n", argon2_type2string(type, 1), t_cost, m_cost >> 10, thread_n, (float)delta / 1024, mcycles); run_time += run_time / (CLOCKS_PER_SEC); /*run_time += accum; printf("%2.4f seconds\n\n", (double)run_time);*/ } /*run_time = 0;*/ run_time += accum; printf("%2.4f seconds\n\n", run_time); } } } int main() { benchmark(); return ARGON2_OK; }
This was the bench.c file from the argon2 password hashing function.
The following was the results from machine 2 running the modified program:
Argon2i 3 iterations 1 MiB 1 threads: 3.54 cpb 3.54 Mcycles Argon2d 3 iterations 1 MiB 1 threads: 3.20 cpb 3.20 Mcycles Argon2id 3 iterations 1 MiB 1 threads: 2.73 cpb 2.73 Mcycles 0.0029 seconds Argon2i 3 iterations 1 MiB 2 threads: 2.92 cpb 2.92 Mcycles Argon2d 3 iterations 1 MiB 2 threads: 2.34 cpb 2.34 Mcycles Argon2id 3 iterations 1 MiB 2 threads: 2.40 cpb 2.40 Mcycles 0.0025 seconds Argon2i 3 iterations 1 MiB 4 threads: 1.97 cpb 1.97 Mcycles Argon2d 3 iterations 1 MiB 4 threads: 1.87 cpb 1.87 Mcycles Argon2id 3 iterations 1 MiB 4 threads: 1.94 cpb 1.94 Mcycles 0.0020 seconds Argon2i 3 iterations 1 MiB 8 threads: 3.21 cpb 3.21 Mcycles Argon2d 3 iterations 1 MiB 8 threads: 3.00 cpb 3.00 Mcycles Argon2id 3 iterations 1 MiB 8 threads: 2.81 cpb 2.81 Mcycles 0.0030 seconds Argon2i 3 iterations 2 MiB 1 threads: 1.40 cpb 2.79 Mcycles Argon2d 3 iterations 2 MiB 1 threads: 1.21 cpb 2.42 Mcycles Argon2id 3 iterations 2 MiB 1 threads: 1.04 cpb 2.08 Mcycles 0.0022 seconds Argon2i 3 iterations 2 MiB 2 threads: 1.44 cpb 2.88 Mcycles Argon2d 3 iterations 2 MiB 2 threads: 1.36 cpb 2.72 Mcycles Argon2id 3 iterations 2 MiB 2 threads: 1.37 cpb 2.73 Mcycles 0.0029 seconds Argon2i 3 iterations 2 MiB 4 threads: 0.99 cpb 1.99 Mcycles Argon2d 3 iterations 2 MiB 4 threads: 1.11 cpb 2.21 Mcycles Argon2id 3 iterations 2 MiB 4 threads: 1.05 cpb 2.11 Mcycles 0.0022 seconds Argon2i 3 iterations 2 MiB 8 threads: 1.67 cpb 3.35 Mcycles Argon2d 3 iterations 2 MiB 8 threads: 1.54 cpb 3.08 Mcycles Argon2id 3 iterations 2 MiB 8 threads: 1.51 cpb 3.02 Mcycles 0.0032 seconds Argon2i 3 iterations 4 MiB 1 threads: 1.41 cpb 5.65 Mcycles Argon2d 3 iterations 4 MiB 1 threads: 1.09 cpb 4.38 Mcycles Argon2id 3 iterations 4 MiB 1 threads: 0.98 cpb 3.92 Mcycles 0.0041 seconds Argon2i 3 iterations 4 MiB 2 threads: 1.28 cpb 5.13 Mcycles Argon2d 3 iterations 4 MiB 2 threads: 1.21 cpb 4.85 Mcycles Argon2id 3 iterations 4 MiB 2 threads: 1.23 cpb 4.93 Mcycles 0.0052 seconds Argon2i 3 iterations 4 MiB 4 threads: 0.79 cpb 3.18 Mcycles Argon2d 3 iterations 4 MiB 4 threads: 0.79 cpb 3.18 Mcycles Argon2id 3 iterations 4 MiB 4 threads: 0.81 cpb 3.22 Mcycles 0.0034 seconds Argon2i 3 iterations 4 MiB 8 threads: 1.00 cpb 4.00 Mcycles Argon2d 3 iterations 4 MiB 8 threads: 0.89 cpb 3.58 Mcycles Argon2id 3 iterations 4 MiB 8 threads: 0.91 cpb 3.64 Mcycles 0.0038 seconds Argon2i 3 iterations 8 MiB 1 threads: 1.47 cpb 11.79 Mcycles Argon2d 3 iterations 8 MiB 1 threads: 1.13 cpb 9.08 Mcycles Argon2id 3 iterations 8 MiB 1 threads: 0.97 cpb 7.80 Mcycles 0.0082 seconds Argon2i 3 iterations 8 MiB 2 threads: 1.27 cpb 10.18 Mcycles Argon2d 3 iterations 8 MiB 2 threads: 0.87 cpb 6.95 Mcycles Argon2id 3 iterations 8 MiB 2 threads: 0.88 cpb 7.00 Mcycles 0.0073 seconds Argon2i 3 iterations 8 MiB 4 threads: 0.91 cpb 7.31 Mcycles Argon2d 3 iterations 8 MiB 4 threads: 0.80 cpb 6.42 Mcycles Argon2id 3 iterations 8 MiB 4 threads: 0.59 cpb 4.70 Mcycles 0.0049 seconds Argon2i 3 iterations 8 MiB 8 threads: 0.82 cpb 6.53 Mcycles Argon2d 3 iterations 8 MiB 8 threads: 0.83 cpb 6.63 Mcycles Argon2id 3 iterations 8 MiB 8 threads: 0.81 cpb 6.47 Mcycles 0.0068 seconds Argon2i 3 iterations 16 MiB 1 threads: 1.89 cpb 30.20 Mcycles Argon2d 3 iterations 16 MiB 1 threads: 1.33 cpb 21.22 Mcycles Argon2id 3 iterations 16 MiB 1 threads: 1.17 cpb 18.70 Mcycles 0.0196 seconds Argon2i 3 iterations 16 MiB 2 threads: 1.17 cpb 18.80 Mcycles Argon2d 3 iterations 16 MiB 2 threads: 0.81 cpb 13.03 Mcycles Argon2id 3 iterations 16 MiB 2 threads: 0.79 cpb 12.57 Mcycles 0.0132 seconds Argon2i 3 iterations 16 MiB 4 threads: 0.80 cpb 12.79 Mcycles Argon2d 3 iterations 16 MiB 4 threads: 0.56 cpb 8.97 Mcycles Argon2id 3 iterations 16 MiB 4 threads: 0.53 cpb 8.45 Mcycles 0.0089 seconds Argon2i 3 iterations 16 MiB 8 threads: 0.60 cpb 9.57 Mcycles Argon2d 3 iterations 16 MiB 8 threads: 0.64 cpb 10.22 Mcycles Argon2id 3 iterations 16 MiB 8 threads: 0.68 cpb 10.83 Mcycles 0.0114 seconds Argon2i 3 iterations 32 MiB 1 threads: 1.64 cpb 52.53 Mcycles Argon2d 3 iterations 32 MiB 1 threads: 1.50 cpb 47.89 Mcycles Argon2id 3 iterations 32 MiB 1 threads: 1.49 cpb 47.84 Mcycles 0.0502 seconds Argon2i 3 iterations 32 MiB 2 threads: 1.28 cpb 41.08 Mcycles Argon2d 3 iterations 32 MiB 2 threads: 1.29 cpb 41.17 Mcycles Argon2id 3 iterations 32 MiB 2 threads: 1.38 cpb 44.31 Mcycles 0.0465 seconds Argon2i 3 iterations 32 MiB 4 threads: 0.86 cpb 27.46 Mcycles Argon2d 3 iterations 32 MiB 4 threads: 0.74 cpb 23.58 Mcycles Argon2id 3 iterations 32 MiB 4 threads: 0.65 cpb 20.68 Mcycles 0.0217 seconds Argon2i 3 iterations 32 MiB 8 threads: 0.68 cpb 21.81 Mcycles Argon2d 3 iterations 32 MiB 8 threads: 0.69 cpb 22.09 Mcycles Argon2id 3 iterations 32 MiB 8 threads: 0.68 cpb 21.73 Mcycles 0.0228 seconds Argon2i 3 iterations 64 MiB 1 threads: 1.61 cpb 103.11 Mcycles Argon2d 3 iterations 64 MiB 1 threads: 1.58 cpb 101.05 Mcycles Argon2id 3 iterations 64 MiB 1 threads: 1.58 cpb 101.25 Mcycles 0.1062 seconds Argon2i 3 iterations 64 MiB 2 threads: 1.44 cpb 92.42 Mcycles Argon2d 3 iterations 64 MiB 2 threads: 1.18 cpb 75.76 Mcycles Argon2id 3 iterations 64 MiB 2 threads: 1.18 cpb 75.28 Mcycles 0.0789 seconds Argon2i 3 iterations 64 MiB 4 threads: 0.76 cpb 48.48 Mcycles Argon2d 3 iterations 64 MiB 4 threads: 0.65 cpb 41.49 Mcycles Argon2id 3 iterations 64 MiB 4 threads: 0.63 cpb 40.49 Mcycles 0.0425 seconds Argon2i 3 iterations 64 MiB 8 threads: 0.58 cpb 37.08 Mcycles Argon2d 3 iterations 64 MiB 8 threads: 0.61 cpb 38.88 Mcycles Argon2id 3 iterations 64 MiB 8 threads: 0.61 cpb 39.02 Mcycles 0.0409 seconds Argon2i 3 iterations 128 MiB 1 threads: 1.72 cpb 220.68 Mcycles Argon2d 3 iterations 128 MiB 1 threads: 1.65 cpb 211.20 Mcycles Argon2id 3 iterations 128 MiB 1 threads: 1.61 cpb 206.66 Mcycles 0.2167 seconds Argon2i 3 iterations 128 MiB 2 threads: 1.12 cpb 143.16 Mcycles Argon2d 3 iterations 128 MiB 2 threads: 1.11 cpb 142.53 Mcycles Argon2id 3 iterations 128 MiB 2 threads: 1.11 cpb 142.67 Mcycles 0.1496 seconds Argon2i 3 iterations 128 MiB 4 threads: 0.68 cpb 87.52 Mcycles Argon2d 3 iterations 128 MiB 4 threads: 0.68 cpb 86.96 Mcycles Argon2id 3 iterations 128 MiB 4 threads: 0.68 cpb 86.78 Mcycles 0.0910 seconds Argon2i 3 iterations 128 MiB 8 threads: 0.59 cpb 75.56 Mcycles Argon2d 3 iterations 128 MiB 8 threads: 0.55 cpb 70.96 Mcycles Argon2id 3 iterations 128 MiB 8 threads: 0.58 cpb 74.02 Mcycles 0.0776 seconds Argon2i 3 iterations 256 MiB 1 threads: 1.75 cpb 447.73 Mcycles Argon2d 3 iterations 256 MiB 1 threads: 1.62 cpb 414.48 Mcycles Argon2id 3 iterations 256 MiB 1 threads: 1.62 cpb 415.25 Mcycles 0.4354 seconds Argon2i 3 iterations 256 MiB 2 threads: 1.17 cpb 299.72 Mcycles Argon2d 3 iterations 256 MiB 2 threads: 1.07 cpb 274.17 Mcycles Argon2id 3 iterations 256 MiB 2 threads: 1.14 cpb 291.48 Mcycles 0.3056 seconds Argon2i 3 iterations 256 MiB 4 threads: 0.70 cpb 180.25 Mcycles Argon2d 3 iterations 256 MiB 4 threads: 0.71 cpb 182.79 Mcycles Argon2id 3 iterations 256 MiB 4 threads: 0.70 cpb 180.23 Mcycles 0.1890 seconds Argon2i 3 iterations 256 MiB 8 threads: 0.54 cpb 137.75 Mcycles Argon2d 3 iterations 256 MiB 8 threads: 0.54 cpb 139.23 Mcycles Argon2id 3 iterations 256 MiB 8 threads: 0.53 cpb 134.82 Mcycles 0.1414 seconds
This is strange as the original had a result of this:
2292451852727619283Argon2i 3 iterations 1 MiB 1 threads: 10574.63 cpb 10574.64 Mcycles 9176590593415145417Argon2d 3 iterations 1 MiB 1 threads: 10573.79 cpb 10573.79 Mcycles 16050798784100622823Argon2id 3 iterations 1 MiB 1 threads: 10571.93 cpb 10571.94 Mcycles 0.0100 seconds 2290633554493452044Argon2i 3 iterations 1 MiB 2 threads: 10574.07 cpb 10574.07 Mcycles 29783368801178634129Argon2d 3 iterations 1 MiB 2 threads: 10571.67 cpb 10571.67 Mcycles 36635109851864293143Argon2id 3 iterations 1 MiB 2 threads: 10572.13 cpb 10572.13 Mcycles 0.0160 seconds
Note: The beginning of each line has a random set of numbers. The cpb and the Mcycles were really long meaning the CPU is slower to hash the result.
I will now change the optimization level to -O3 and retest the program.
Result:
Argon2i 3 iterations 1 MiB 1 threads: 3.42 cpb 3.42 Mcycles Argon2d 3 iterations 1 MiB 1 threads: 3.18 cpb 3.18 Mcycles Argon2id 3 iterations 1 MiB 1 threads: 2.72 cpb 2.72 Mcycles 0.0029 seconds Argon2i 3 iterations 1 MiB 2 threads: 2.49 cpb 2.49 Mcycles Argon2d 3 iterations 1 MiB 2 threads: 2.33 cpb 2.33 Mcycles Argon2id 3 iterations 1 MiB 2 threads: 2.30 cpb 2.31 Mcycles 0.0024 seconds Argon2i 3 iterations 1 MiB 4 threads: 2.23 cpb 2.23 Mcycles Argon2d 3 iterations 1 MiB 4 threads: 2.06 cpb 2.06 Mcycles Argon2id 3 iterations 1 MiB 4 threads: 1.71 cpb 1.71 Mcycles 0.0018 seconds Argon2i 3 iterations 1 MiB 8 threads: 3.17 cpb 3.17 Mcycles Argon2d 3 iterations 1 MiB 8 threads: 3.00 cpb 3.00 Mcycles Argon2id 3 iterations 1 MiB 8 threads: 2.99 cpb 2.99 Mcycles 0.0031 seconds Argon2i 3 iterations 2 MiB 1 threads: 1.41 cpb 2.82 Mcycles Argon2d 3 iterations 2 MiB 1 threads: 1.23 cpb 2.47 Mcycles Argon2id 3 iterations 2 MiB 1 threads: 1.04 cpb 2.07 Mcycles 0.0022 seconds Argon2i 3 iterations 2 MiB 2 threads: 1.39 cpb 2.79 Mcycles Argon2d 3 iterations 2 MiB 2 threads: 1.36 cpb 2.73 Mcycles Argon2id 3 iterations 2 MiB 2 threads: 1.34 cpb 2.69 Mcycles 0.0028 seconds Argon2i 3 iterations 2 MiB 4 threads: 1.02 cpb 2.04 Mcycles Argon2d 3 iterations 2 MiB 4 threads: 0.99 cpb 1.99 Mcycles Argon2id 3 iterations 2 MiB 4 threads: 1.00 cpb 1.99 Mcycles 0.0021 seconds Argon2i 3 iterations 2 MiB 8 threads: 1.71 cpb 3.43 Mcycles Argon2d 3 iterations 2 MiB 8 threads: 1.68 cpb 3.37 Mcycles Argon2id 3 iterations 2 MiB 8 threads: 1.64 cpb 3.29 Mcycles 0.0034 seconds Argon2i 3 iterations 4 MiB 1 threads: 1.37 cpb 5.49 Mcycles Argon2d 3 iterations 4 MiB 1 threads: 1.10 cpb 4.40 Mcycles Argon2id 3 iterations 4 MiB 1 threads: 1.01 cpb 4.06 Mcycles 0.0043 seconds Argon2i 3 iterations 4 MiB 2 threads: 1.35 cpb 5.40 Mcycles Argon2d 3 iterations 4 MiB 2 threads: 1.18 cpb 4.71 Mcycles Argon2id 3 iterations 4 MiB 2 threads: 1.19 cpb 4.78 Mcycles 0.0050 seconds Argon2i 3 iterations 4 MiB 4 threads: 0.91 cpb 3.65 Mcycles Argon2d 3 iterations 4 MiB 4 threads: 0.91 cpb 3.63 Mcycles Argon2id 3 iterations 4 MiB 4 threads: 0.90 cpb 3.62 Mcycles 0.0038 seconds Argon2i 3 iterations 4 MiB 8 threads: 1.02 cpb 4.08 Mcycles Argon2d 3 iterations 4 MiB 8 threads: 1.01 cpb 4.03 Mcycles Argon2id 3 iterations 4 MiB 8 threads: 0.95 cpb 3.80 Mcycles 0.0040 seconds Argon2i 3 iterations 8 MiB 1 threads: 1.40 cpb 11.22 Mcycles Argon2d 3 iterations 8 MiB 1 threads: 1.16 cpb 9.25 Mcycles Argon2id 3 iterations 8 MiB 1 threads: 0.99 cpb 7.93 Mcycles 0.0083 seconds Argon2i 3 iterations 8 MiB 2 threads: 1.42 cpb 11.40 Mcycles Argon2d 3 iterations 8 MiB 2 threads: 0.88 cpb 7.03 Mcycles Argon2id 3 iterations 8 MiB 2 threads: 0.75 cpb 6.02 Mcycles 0.0063 seconds Argon2i 3 iterations 8 MiB 4 threads: 0.94 cpb 7.49 Mcycles Argon2d 3 iterations 8 MiB 4 threads: 0.74 cpb 5.96 Mcycles Argon2id 3 iterations 8 MiB 4 threads: 0.55 cpb 4.44 Mcycles 0.0047 seconds Argon2i 3 iterations 8 MiB 8 threads: 0.71 cpb 5.67 Mcycles Argon2d 3 iterations 8 MiB 8 threads: 0.76 cpb 6.11 Mcycles Argon2id 3 iterations 8 MiB 8 threads: 0.75 cpb 5.97 Mcycles 0.0063 seconds Argon2i 3 iterations 16 MiB 1 threads: 1.62 cpb 25.97 Mcycles Argon2d 3 iterations 16 MiB 1 threads: 1.27 cpb 20.26 Mcycles Argon2id 3 iterations 16 MiB 1 threads: 1.14 cpb 18.20 Mcycles 0.0191 seconds Argon2i 3 iterations 16 MiB 2 threads: 1.35 cpb 21.65 Mcycles Argon2d 3 iterations 16 MiB 2 threads: 0.98 cpb 15.62 Mcycles Argon2id 3 iterations 16 MiB 2 threads: 0.92 cpb 14.74 Mcycles 0.0155 seconds Argon2i 3 iterations 16 MiB 4 threads: 0.84 cpb 13.44 Mcycles Argon2d 3 iterations 16 MiB 4 threads: 0.54 cpb 8.65 Mcycles Argon2id 3 iterations 16 MiB 4 threads: 0.58 cpb 9.27 Mcycles 0.0097 seconds Argon2i 3 iterations 16 MiB 8 threads: 0.61 cpb 9.80 Mcycles Argon2d 3 iterations 16 MiB 8 threads: 0.61 cpb 9.72 Mcycles Argon2id 3 iterations 16 MiB 8 threads: 0.67 cpb 10.75 Mcycles 0.0113 seconds Argon2i 3 iterations 32 MiB 1 threads: 1.58 cpb 50.49 Mcycles Argon2d 3 iterations 32 MiB 1 threads: 1.47 cpb 46.95 Mcycles Argon2id 3 iterations 32 MiB 1 threads: 1.47 cpb 47.09 Mcycles 0.0494 seconds Argon2i 3 iterations 32 MiB 2 threads: 1.46 cpb 46.79 Mcycles Argon2d 3 iterations 32 MiB 2 threads: 1.39 cpb 44.55 Mcycles Argon2id 3 iterations 32 MiB 2 threads: 1.42 cpb 45.41 Mcycles 0.0476 seconds Argon2i 3 iterations 32 MiB 4 threads: 0.85 cpb 27.25 Mcycles Argon2d 3 iterations 32 MiB 4 threads: 0.63 cpb 20.09 Mcycles Argon2id 3 iterations 32 MiB 4 threads: 0.67 cpb 21.30 Mcycles 0.0223 seconds Argon2i 3 iterations 32 MiB 8 threads: 0.65 cpb 20.74 Mcycles Argon2d 3 iterations 32 MiB 8 threads: 0.67 cpb 21.54 Mcycles Argon2id 3 iterations 32 MiB 8 threads: 0.67 cpb 21.34 Mcycles 0.0224 seconds Argon2i 3 iterations 64 MiB 1 threads: 1.60 cpb 102.66 Mcycles Argon2d 3 iterations 64 MiB 1 threads: 1.55 cpb 99.24 Mcycles Argon2id 3 iterations 64 MiB 1 threads: 1.55 cpb 99.25 Mcycles 0.1041 seconds Argon2i 3 iterations 64 MiB 2 threads: 1.22 cpb 78.43 Mcycles Argon2d 3 iterations 64 MiB 2 threads: 1.26 cpb 80.65 Mcycles Argon2id 3 iterations 64 MiB 2 threads: 1.20 cpb 76.73 Mcycles 0.0805 seconds Argon2i 3 iterations 64 MiB 4 threads: 0.76 cpb 48.88 Mcycles Argon2d 3 iterations 64 MiB 4 threads: 0.68 cpb 43.39 Mcycles Argon2id 3 iterations 64 MiB 4 threads: 0.74 cpb 47.31 Mcycles 0.0496 seconds Argon2i 3 iterations 64 MiB 8 threads: 0.65 cpb 41.82 Mcycles Argon2d 3 iterations 64 MiB 8 threads: 0.63 cpb 40.18 Mcycles Argon2id 3 iterations 64 MiB 8 threads: 0.67 cpb 42.62 Mcycles 0.0447 seconds Argon2i 3 iterations 128 MiB 1 threads: 1.66 cpb 212.21 Mcycles Argon2d 3 iterations 128 MiB 1 threads: 1.72 cpb 219.73 Mcycles Argon2id 3 iterations 128 MiB 1 threads: 1.64 cpb 209.82 Mcycles 0.2200 seconds Argon2i 3 iterations 128 MiB 2 threads: 1.24 cpb 158.31 Mcycles Argon2d 3 iterations 128 MiB 2 threads: 1.11 cpb 142.63 Mcycles Argon2id 3 iterations 128 MiB 2 threads: 1.19 cpb 152.53 Mcycles 0.1599 seconds Argon2i 3 iterations 128 MiB 4 threads: 0.75 cpb 95.45 Mcycles Argon2d 3 iterations 128 MiB 4 threads: 0.68 cpb 86.76 Mcycles Argon2id 3 iterations 128 MiB 4 threads: 0.68 cpb 87.00 Mcycles 0.0912 seconds Argon2i 3 iterations 128 MiB 8 threads: 0.57 cpb 72.78 Mcycles Argon2d 3 iterations 128 MiB 8 threads: 0.58 cpb 74.95 Mcycles Argon2id 3 iterations 128 MiB 8 threads: 0.59 cpb 75.34 Mcycles 0.0790 seconds Argon2i 3 iterations 256 MiB 1 threads: 1.76 cpb 451.19 Mcycles Argon2d 3 iterations 256 MiB 1 threads: 1.69 cpb 433.36 Mcycles Argon2id 3 iterations 256 MiB 1 threads: 1.60 cpb 408.90 Mcycles 0.4288 seconds Argon2i 3 iterations 256 MiB 2 threads: 1.16 cpb 296.43 Mcycles Argon2d 3 iterations 256 MiB 2 threads: 1.09 cpb 279.88 Mcycles Argon2id 3 iterations 256 MiB 2 threads: 1.18 cpb 301.38 Mcycles 0.3160 seconds Argon2i 3 iterations 256 MiB 4 threads: 0.74 cpb 189.06 Mcycles Argon2d 3 iterations 256 MiB 4 threads: 0.68 cpb 174.25 Mcycles Argon2id 3 iterations 256 MiB 4 threads: 0.71 cpb 180.84 Mcycles 0.1896 seconds Argon2i 3 iterations 256 MiB 8 threads: 0.50 cpb 128.98 Mcycles Argon2d 3 iterations 256 MiB 8 threads: 0.55 cpb 141.48 Mcycles Argon2id 3 iterations 256 MiB 8 threads: 0.52 cpb 132.25 Mcycles 0.1387 seconds Argon2i 3 iterations 512 MiB 1 threads: 1.75 cpb 895.61 Mcycles Argon2d 3 iterations 512 MiB 1 threads: 1.65 cpb 844.13 Mcycles Argon2id 3 iterations 512 MiB 1 threads: 1.65 cpb 843.89 Mcycles 0.8849 seconds Argon2i 3 iterations 512 MiB 2 threads: 1.10 cpb 563.01 Mcycles Argon2d 3 iterations 512 MiB 2 threads: 1.12 cpb 573.63 Mcycles Argon2id 3 iterations 512 MiB 2 threads: 1.12 cpb 575.07 Mcycles 0.6030 seconds Argon2i 3 iterations 512 MiB 4 threads: 0.67 cpb 341.87 Mcycles Argon2d 3 iterations 512 MiB 4 threads: 0.69 cpb 351.20 Mcycles Argon2id 3 iterations 512 MiB 4 threads: 0.66 cpb 337.59 Mcycles 0.3540 seconds Argon2i 3 iterations 512 MiB 8 threads: 0.50 cpb 255.14 Mcycles Argon2d 3 iterations 512 MiB 8 threads: 0.49 cpb 253.08 Mcycles Argon2id 3 iterations 512 MiB 8 threads: 0.50 cpb 258.21 Mcycles 0.2708 seconds
The result runs fairly fast. This is expected as the optimization level is -O3.
Test 3 (Extra):
I will be testing on a third machine.
Specifications:
8 core aarch64 X-Gene CPU
Two sticks of DDR3 4096 MB RAM @ 1600 MHz
Fedora 28 64-bit Linux Operating System
Result:
This is with optimization level -O2.
Building without optimizations cc -std=c89 -O2 -Wall -g -Iinclude -Isrc -pthread src/argon2.c src/core.c src/blake2/blake2b.c src/thread.c src/encoding.c src/ref.c src/bench.c -o bench
Argon2i 3 iterations 1 MiB 1 threads: 5.51 cpb 5.51 Mcycles Argon2d 3 iterations 1 MiB 1 threads: 5.18 cpb 5.18 Mcycles Argon2id 3 iterations 1 MiB 1 threads: 4.78 cpb 4.78 Mcycles 0.0050 seconds Argon2i 3 iterations 1 MiB 2 threads: 4.00 cpb 4.00 Mcycles Argon2d 3 iterations 1 MiB 2 threads: 3.67 cpb 3.67 Mcycles Argon2id 3 iterations 1 MiB 2 threads: 3.76 cpb 3.76 Mcycles 0.0039 seconds Argon2i 3 iterations 1 MiB 4 threads: 3.16 cpb 3.16 Mcycles Argon2d 3 iterations 1 MiB 4 threads: 2.95 cpb 2.95 Mcycles Argon2id 3 iterations 1 MiB 4 threads: 3.07 cpb 3.07 Mcycles 0.0032 seconds Argon2i 3 iterations 1 MiB 8 threads: 5.75 cpb 5.75 Mcycles Argon2d 3 iterations 1 MiB 8 threads: 5.90 cpb 5.90 Mcycles Argon2id 3 iterations 1 MiB 8 threads: 6.04 cpb 6.04 Mcycles 0.0063 seconds Argon2i 3 iterations 2 MiB 1 threads: 5.48 cpb 10.96 Mcycles Argon2d 3 iterations 2 MiB 1 threads: 5.27 cpb 10.53 Mcycles Argon2id 3 iterations 2 MiB 1 threads: 4.80 cpb 9.59 Mcycles 0.0101 seconds Argon2i 3 iterations 2 MiB 2 threads: 3.18 cpb 6.35 Mcycles Argon2d 3 iterations 2 MiB 2 threads: 3.14 cpb 6.27 Mcycles Argon2id 3 iterations 2 MiB 2 threads: 3.05 cpb 6.10 Mcycles 0.0064 seconds Argon2i 3 iterations 2 MiB 4 threads: 2.38 cpb 4.76 Mcycles Argon2d 3 iterations 2 MiB 4 threads: 2.33 cpb 4.67 Mcycles Argon2id 3 iterations 2 MiB 4 threads: 2.36 cpb 4.72 Mcycles 0.0050 seconds Argon2i 3 iterations 2 MiB 8 threads: 3.62 cpb 7.23 Mcycles Argon2d 3 iterations 2 MiB 8 threads: 3.58 cpb 7.15 Mcycles Argon2id 3 iterations 2 MiB 8 threads: 3.67 cpb 7.34 Mcycles 0.0077 seconds Argon2i 3 iterations 4 MiB 1 threads: 5.58 cpb 22.32 Mcycles Argon2d 3 iterations 4 MiB 1 threads: 5.09 cpb 20.35 Mcycles Argon2id 3 iterations 4 MiB 1 threads: 4.84 cpb 19.36 Mcycles 0.0203 seconds Argon2i 3 iterations 4 MiB 2 threads: 2.87 cpb 11.49 Mcycles Argon2d 3 iterations 4 MiB 2 threads: 2.86 cpb 11.45 Mcycles Argon2id 3 iterations 4 MiB 2 threads: 2.84 cpb 11.38 Mcycles 0.0119 seconds Argon2i 3 iterations 4 MiB 4 threads: 1.89 cpb 7.54 Mcycles Argon2d 3 iterations 4 MiB 4 threads: 1.82 cpb 7.30 Mcycles Argon2id 3 iterations 4 MiB 4 threads: 1.80 cpb 7.21 Mcycles 0.0076 seconds Argon2i 3 iterations 4 MiB 8 threads: 2.47 cpb 9.90 Mcycles Argon2d 3 iterations 4 MiB 8 threads: 2.55 cpb 10.19 Mcycles Argon2id 3 iterations 4 MiB 8 threads: 2.63 cpb 10.51 Mcycles 0.0110 seconds Argon2i 3 iterations 8 MiB 1 threads: 5.82 cpb 46.54 Mcycles Argon2d 3 iterations 8 MiB 1 threads: 5.33 cpb 42.66 Mcycles Argon2id 3 iterations 8 MiB 1 threads: 5.04 cpb 40.33 Mcycles 0.0423 seconds Argon2i 3 iterations 8 MiB 2 threads: 2.84 cpb 22.69 Mcycles Argon2d 3 iterations 8 MiB 2 threads: 2.78 cpb 22.22 Mcycles Argon2id 3 iterations 8 MiB 2 threads: 2.83 cpb 22.65 Mcycles 0.0237 seconds Argon2i 3 iterations 8 MiB 4 threads: 1.65 cpb 13.20 Mcycles Argon2d 3 iterations 8 MiB 4 threads: 1.63 cpb 13.07 Mcycles Argon2id 3 iterations 8 MiB 4 threads: 1.64 cpb 13.11 Mcycles 0.0137 seconds Argon2i 3 iterations 8 MiB 8 threads: 2.09 cpb 16.73 Mcycles Argon2d 3 iterations 8 MiB 8 threads: 1.95 cpb 15.62 Mcycles Argon2id 3 iterations 8 MiB 8 threads: 2.36 cpb 18.85 Mcycles 0.0198 seconds Argon2i 3 iterations 16 MiB 1 threads: 6.14 cpb 98.25 Mcycles Argon2d 3 iterations 16 MiB 1 threads: 5.70 cpb 91.25 Mcycles Argon2id 3 iterations 16 MiB 1 threads: 5.47 cpb 87.54 Mcycles 0.0918 seconds Argon2i 3 iterations 16 MiB 2 threads: 2.98 cpb 47.67 Mcycles Argon2d 3 iterations 16 MiB 2 threads: 2.93 cpb 46.88 Mcycles Argon2id 3 iterations 16 MiB 2 threads: 2.94 cpb 47.08 Mcycles 0.0494 seconds Argon2i 3 iterations 16 MiB 4 threads: 1.62 cpb 25.96 Mcycles Argon2d 3 iterations 16 MiB 4 threads: 1.61 cpb 25.72 Mcycles Argon2id 3 iterations 16 MiB 4 threads: 1.62 cpb 25.90 Mcycles 0.0272 seconds Argon2i 3 iterations 16 MiB 8 threads: 1.79 cpb 28.67 Mcycles Argon2d 3 iterations 16 MiB 8 threads: 1.75 cpb 28.07 Mcycles Argon2id 3 iterations 16 MiB 8 threads: 1.82 cpb 29.16 Mcycles 0.0306 seconds Argon2i 3 iterations 32 MiB 1 threads: 6.34 cpb 203.00 Mcycles Argon2d 3 iterations 32 MiB 1 threads: 6.26 cpb 200.26 Mcycles Argon2id 3 iterations 32 MiB 1 threads: 6.27 cpb 200.72 Mcycles 0.2105 seconds Argon2i 3 iterations 32 MiB 2 threads: 3.42 cpb 109.52 Mcycles Argon2d 3 iterations 32 MiB 2 threads: 3.38 cpb 108.09 Mcycles Argon2id 3 iterations 32 MiB 2 threads: 3.38 cpb 108.12 Mcycles 0.1134 seconds Argon2i 3 iterations 32 MiB 4 threads: 1.93 cpb 61.63 Mcycles Argon2d 3 iterations 32 MiB 4 threads: 1.90 cpb 60.92 Mcycles Argon2id 3 iterations 32 MiB 4 threads: 1.94 cpb 62.00 Mcycles 0.0650 seconds Argon2i 3 iterations 32 MiB 8 threads: 1.94 cpb 62.07 Mcycles Argon2d 3 iterations 32 MiB 8 threads: 1.96 cpb 62.58 Mcycles Argon2id 3 iterations 32 MiB 8 threads: 1.92 cpb 61.30 Mcycles 0.0643 seconds Argon2i 3 iterations 64 MiB 1 threads: 6.48 cpb 414.84 Mcycles Argon2d 3 iterations 64 MiB 1 threads: 6.40 cpb 409.88 Mcycles Argon2id 3 iterations 64 MiB 1 threads: 6.41 cpb 410.55 Mcycles 0.4305 seconds Argon2i 3 iterations 64 MiB 2 threads: 3.47 cpb 221.90 Mcycles Argon2d 3 iterations 64 MiB 2 threads: 3.43 cpb 219.27 Mcycles Argon2id 3 iterations 64 MiB 2 threads: 3.43 cpb 219.69 Mcycles 0.2304 seconds Argon2i 3 iterations 64 MiB 4 threads: 1.92 cpb 123.08 Mcycles Argon2d 3 iterations 64 MiB 4 threads: 1.90 cpb 121.74 Mcycles Argon2id 3 iterations 64 MiB 4 threads: 1.93 cpb 123.49 Mcycles 0.1295 seconds Argon2i 3 iterations 64 MiB 8 threads: 1.82 cpb 116.51 Mcycles Argon2d 3 iterations 64 MiB 8 threads: 1.79 cpb 114.79 Mcycles Argon2id 3 iterations 64 MiB 8 threads: 1.80 cpb 115.02 Mcycles 0.1206 seconds Argon2i 3 iterations 128 MiB 1 threads: 6.60 cpb 844.52 Mcycles Argon2d 3 iterations 128 MiB 1 threads: 6.52 cpb 835.11 Mcycles Argon2id 3 iterations 128 MiB 1 threads: 6.54 cpb 836.68 Mcycles 0.8773 seconds Argon2i 3 iterations 128 MiB 2 threads: 3.52 cpb 450.00 Mcycles Argon2d 3 iterations 128 MiB 2 threads: 3.47 cpb 444.85 Mcycles Argon2id 3 iterations 128 MiB 2 threads: 3.49 cpb 446.23 Mcycles 0.4679 seconds Argon2i 3 iterations 128 MiB 4 threads: 1.94 cpb 247.84 Mcycles Argon2d 3 iterations 128 MiB 4 threads: 1.91 cpb 245.05 Mcycles Argon2id 3 iterations 128 MiB 4 threads: 1.92 cpb 245.15 Mcycles 0.2571 seconds Argon2i 3 iterations 128 MiB 8 threads: 1.73 cpb 221.21 Mcycles Argon2d 3 iterations 128 MiB 8 threads: 1.70 cpb 217.79 Mcycles Argon2id 3 iterations 128 MiB 8 threads: 1.64 cpb 209.97 Mcycles 0.2202 seconds Argon2i 3 iterations 256 MiB 1 threads: 6.69 cpb 1712.64 Mcycles Argon2d 3 iterations 256 MiB 1 threads: 6.62 cpb 1694.77 Mcycles Argon2id 3 iterations 256 MiB 1 threads: 6.63 cpb 1696.72 Mcycles 1.7791 seconds Argon2i 3 iterations 256 MiB 2 threads: 3.55 cpb 909.09 Mcycles Argon2d 3 iterations 256 MiB 2 threads: 3.51 cpb 899.22 Mcycles Argon2id 3 iterations 256 MiB 2 threads: 3.52 cpb 900.67 Mcycles 0.9444 seconds Argon2i 3 iterations 256 MiB 4 threads: 1.95 cpb 499.72 Mcycles Argon2d 3 iterations 256 MiB 4 threads: 1.94 cpb 497.66 Mcycles Argon2id 3 iterations 256 MiB 4 threads: 1.94 cpb 496.66 Mcycles 0.5208 seconds Argon2i 3 iterations 256 MiB 8 threads: 1.48 cpb 379.07 Mcycles Argon2d 3 iterations 256 MiB 8 threads: 1.55 cpb 398.15 Mcycles Argon2id 3 iterations 256 MiB 8 threads: 1.58 cpb 403.45 Mcycles 0.4230 seconds Argon2i 3 iterations 512 MiB 1 threads: 6.75 cpb 3458.96 Mcycles Argon2d 3 iterations 512 MiB 1 threads: 6.68 cpb 3419.92 Mcycles Argon2id 3 iterations 512 MiB 1 threads: 6.69 cpb 3426.03 Mcycles 3.5925 seconds Argon2i 3 iterations 512 MiB 2 threads: 3.58 cpb 1835.84 Mcycles Argon2d 3 iterations 512 MiB 2 threads: 3.55 cpb 1816.11 Mcycles Argon2id 3 iterations 512 MiB 2 threads: 3.55 cpb 1819.26 Mcycles 1.9076 seconds Argon2i 3 iterations 512 MiB 4 threads: 1.97 cpb 1009.56 Mcycles Argon2d 3 iterations 512 MiB 4 threads: 1.95 cpb 997.45 Mcycles Argon2id 3 iterations 512 MiB 4 threads: 2.01 cpb 1028.11 Mcycles 1.0780 seconds Argon2i 3 iterations 512 MiB 8 threads: 1.41 cpb 721.65 Mcycles Argon2d 3 iterations 512 MiB 8 threads: 1.64 cpb 839.50 Mcycles Argon2id 3 iterations 512 MiB 8 threads: 1.69 cpb 865.63 Mcycles 0.9077 seconds
This machine has a slight issue in terms of running quickly. This machine also had less memory than the other two machines. I guess this is expected as a result.
Moving on to the next optimization level -O3.
Result:
Building without optimizations cc -std=c89 -O3 -Wall -g -Iinclude -Isrc -pthread src/argon2.c src/core.c src/blake2/blake2b.c src/thread.c src/encoding.c src/ref.c src/bench.c -o bench
Argon2i 3 iterations 1 MiB 1 threads: 5.75 cpb 5.75 Mcycles Argon2d 3 iterations 1 MiB 1 threads: 5.45 cpb 5.45 Mcycles Argon2id 3 iterations 1 MiB 1 threads: 5.04 cpb 5.04 Mcycles 0.0053 seconds Argon2i 3 iterations 1 MiB 2 threads: 3.97 cpb 3.97 Mcycles Argon2d 3 iterations 1 MiB 2 threads: 3.59 cpb 3.59 Mcycles Argon2id 3 iterations 1 MiB 2 threads: 3.54 cpb 3.54 Mcycles 0.0037 seconds Argon2i 3 iterations 1 MiB 4 threads: 3.00 cpb 3.00 Mcycles Argon2d 3 iterations 1 MiB 4 threads: 2.84 cpb 2.84 Mcycles Argon2id 3 iterations 1 MiB 4 threads: 2.77 cpb 2.77 Mcycles 0.0029 seconds Argon2i 3 iterations 1 MiB 8 threads: 5.19 cpb 5.20 Mcycles Argon2d 3 iterations 1 MiB 8 threads: 5.07 cpb 5.07 Mcycles Argon2id 3 iterations 1 MiB 8 threads: 4.92 cpb 4.93 Mcycles 0.0052 seconds Argon2i 3 iterations 2 MiB 1 threads: 5.70 cpb 11.40 Mcycles Argon2d 3 iterations 2 MiB 1 threads: 5.49 cpb 10.98 Mcycles Argon2id 3 iterations 2 MiB 1 threads: 5.07 cpb 10.14 Mcycles 0.0106 seconds Argon2i 3 iterations 2 MiB 2 threads: 3.19 cpb 6.39 Mcycles Argon2d 3 iterations 2 MiB 2 threads: 3.15 cpb 6.30 Mcycles Argon2id 3 iterations 2 MiB 2 threads: 3.21 cpb 6.43 Mcycles 0.0067 seconds Argon2i 3 iterations 2 MiB 4 threads: 2.20 cpb 4.41 Mcycles Argon2d 3 iterations 2 MiB 4 threads: 2.22 cpb 4.44 Mcycles Argon2id 3 iterations 2 MiB 4 threads: 2.16 cpb 4.32 Mcycles 0.0045 seconds Argon2i 3 iterations 2 MiB 8 threads: 3.68 cpb 7.36 Mcycles Argon2d 3 iterations 2 MiB 8 threads: 2.80 cpb 5.61 Mcycles Argon2id 3 iterations 2 MiB 8 threads: 2.79 cpb 5.58 Mcycles 0.0058 seconds Argon2i 3 iterations 4 MiB 1 threads: 5.81 cpb 23.23 Mcycles Argon2d 3 iterations 4 MiB 1 threads: 5.34 cpb 21.38 Mcycles Argon2id 3 iterations 4 MiB 1 threads: 5.11 cpb 20.43 Mcycles 0.0214 seconds Argon2i 3 iterations 4 MiB 2 threads: 2.98 cpb 11.93 Mcycles Argon2d 3 iterations 4 MiB 2 threads: 2.93 cpb 11.73 Mcycles Argon2id 3 iterations 4 MiB 2 threads: 2.93 cpb 11.71 Mcycles 0.0123 seconds Argon2i 3 iterations 4 MiB 4 threads: 1.82 cpb 7.28 Mcycles Argon2d 3 iterations 4 MiB 4 threads: 1.77 cpb 7.08 Mcycles Argon2id 3 iterations 4 MiB 4 threads: 1.77 cpb 7.07 Mcycles 0.0074 seconds Argon2i 3 iterations 4 MiB 8 threads: 2.50 cpb 9.99 Mcycles Argon2d 3 iterations 4 MiB 8 threads: 2.70 cpb 10.82 Mcycles Argon2id 3 iterations 4 MiB 8 threads: 2.89 cpb 11.54 Mcycles 0.0121 seconds Argon2i 3 iterations 8 MiB 1 threads: 6.05 cpb 48.43 Mcycles Argon2d 3 iterations 8 MiB 1 threads: 5.58 cpb 44.62 Mcycles Argon2id 3 iterations 8 MiB 1 threads: 5.31 cpb 42.46 Mcycles 0.0445 seconds Argon2i 3 iterations 8 MiB 2 threads: 2.95 cpb 23.60 Mcycles Argon2d 3 iterations 8 MiB 2 threads: 2.91 cpb 23.26 Mcycles Argon2id 3 iterations 8 MiB 2 threads: 2.90 cpb 23.23 Mcycles 0.0244 seconds Argon2i 3 iterations 8 MiB 4 threads: 1.66 cpb 13.24 Mcycles Argon2d 3 iterations 8 MiB 4 threads: 1.64 cpb 13.13 Mcycles Argon2id 3 iterations 8 MiB 4 threads: 1.64 cpb 13.10 Mcycles 0.0137 seconds Argon2i 3 iterations 8 MiB 8 threads: 2.03 cpb 16.25 Mcycles Argon2d 3 iterations 8 MiB 8 threads: 2.29 cpb 18.37 Mcycles Argon2id 3 iterations 8 MiB 8 threads: 1.92 cpb 15.33 Mcycles 0.0161 seconds Argon2i 3 iterations 16 MiB 1 threads: 6.37 cpb 102.00 Mcycles Argon2d 3 iterations 16 MiB 1 threads: 5.97 cpb 95.50 Mcycles Argon2id 3 iterations 16 MiB 1 threads: 5.74 cpb 91.90 Mcycles 0.0964 seconds Argon2i 3 iterations 16 MiB 2 threads: 3.12 cpb 49.90 Mcycles Argon2d 3 iterations 16 MiB 2 threads: 3.07 cpb 49.17 Mcycles Argon2id 3 iterations 16 MiB 2 threads: 3.08 cpb 49.33 Mcycles 0.0517 seconds Argon2i 3 iterations 16 MiB 4 threads: 1.70 cpb 27.26 Mcycles Argon2d 3 iterations 16 MiB 4 threads: 1.68 cpb 26.94 Mcycles Argon2id 3 iterations 16 MiB 4 threads: 1.69 cpb 27.04 Mcycles 0.0283 seconds Argon2i 3 iterations 16 MiB 8 threads: 1.81 cpb 28.91 Mcycles Argon2d 3 iterations 16 MiB 8 threads: 1.87 cpb 29.85 Mcycles Argon2id 3 iterations 16 MiB 8 threads: 1.87 cpb 29.86 Mcycles 0.0313 seconds Argon2i 3 iterations 32 MiB 1 threads: 6.57 cpb 210.38 Mcycles Argon2d 3 iterations 32 MiB 1 threads: 6.51 cpb 208.24 Mcycles Argon2id 3 iterations 32 MiB 1 threads: 6.52 cpb 208.70 Mcycles 0.2188 seconds Argon2i 3 iterations 32 MiB 2 threads: 3.53 cpb 112.92 Mcycles Argon2d 3 iterations 32 MiB 2 threads: 3.49 cpb 111.63 Mcycles Argon2id 3 iterations 32 MiB 2 threads: 3.50 cpb 111.91 Mcycles 0.1173 seconds Argon2i 3 iterations 32 MiB 4 threads: 1.97 cpb 63.21 Mcycles Argon2d 3 iterations 32 MiB 4 threads: 1.96 cpb 62.57 Mcycles Argon2id 3 iterations 32 MiB 4 threads: 1.96 cpb 62.68 Mcycles 0.0657 seconds Argon2i 3 iterations 32 MiB 8 threads: 1.89 cpb 60.42 Mcycles Argon2d 3 iterations 32 MiB 8 threads: 2.00 cpb 63.85 Mcycles Argon2id 3 iterations 32 MiB 8 threads: 2.03 cpb 64.85 Mcycles 0.0680 seconds Argon2i 3 iterations 64 MiB 1 threads: 6.72 cpb 430.30 Mcycles Argon2d 3 iterations 64 MiB 1 threads: 6.66 cpb 426.03 Mcycles Argon2id 3 iterations 64 MiB 1 threads: 6.67 cpb 426.61 Mcycles 0.4473 seconds Argon2i 3 iterations 64 MiB 2 threads: 3.58 cpb 229.32 Mcycles Argon2d 3 iterations 64 MiB 2 threads: 3.54 cpb 226.89 Mcycles Argon2id 3 iterations 64 MiB 2 threads: 3.55 cpb 227.27 Mcycles 0.2383 seconds Argon2i 3 iterations 64 MiB 4 threads: 1.98 cpb 126.75 Mcycles Argon2d 3 iterations 64 MiB 4 threads: 1.96 cpb 125.35 Mcycles Argon2id 3 iterations 64 MiB 4 threads: 1.96 cpb 125.71 Mcycles 0.1318 seconds Argon2i 3 iterations 64 MiB 8 threads: 1.87 cpb 119.64 Mcycles Argon2d 3 iterations 64 MiB 8 threads: 1.94 cpb 123.96 Mcycles Argon2id 3 iterations 64 MiB 8 threads: 1.90 cpb 121.41 Mcycles 0.1273 seconds Argon2i 3 iterations 128 MiB 1 threads: 6.83 cpb 874.04 Mcycles Argon2d 3 iterations 128 MiB 1 threads: 6.77 cpb 866.06 Mcycles Argon2id 3 iterations 128 MiB 1 threads: 6.78 cpb 867.69 Mcycles 0.9098 seconds Argon2i 3 iterations 128 MiB 2 threads: 3.62 cpb 464.03 Mcycles Argon2d 3 iterations 128 MiB 2 threads: 3.60 cpb 460.44 Mcycles Argon2id 3 iterations 128 MiB 2 threads: 3.59 cpb 460.12 Mcycles 0.4825 seconds Argon2i 3 iterations 128 MiB 4 threads: 2.00 cpb 255.49 Mcycles Argon2d 3 iterations 128 MiB 4 threads: 1.97 cpb 251.78 Mcycles Argon2id 3 iterations 128 MiB 4 threads: 1.97 cpb 252.45 Mcycles 0.2647 seconds Argon2i 3 iterations 128 MiB 8 threads: 1.85 cpb 236.45 Mcycles Argon2d 3 iterations 128 MiB 8 threads: 1.71 cpb 218.54 Mcycles Argon2id 3 iterations 128 MiB 8 threads: 1.71 cpb 219.59 Mcycles 0.2303 seconds Argon2i 3 iterations 256 MiB 1 threads: 6.92 cpb 1771.62 Mcycles Argon2d 3 iterations 256 MiB 1 threads: 6.86 cpb 1756.04 Mcycles Argon2id 3 iterations 256 MiB 1 threads: 6.87 cpb 1759.49 Mcycles 1.8450 seconds
It looks like the result had a slight improvement in time.
Conclusion:
I do not know what could have removed those random numbers from the x86_64 basic test but I would consider this was a success in porting the argon2 password hashing function bench test tool to work on any Linux OS device such as Aarch64 or x86_64.