/* * (c) 2026, Roberto A. Foglietta , GPLv2 license */ #define VERSION "v0.5.6" /* Quick 2k test: cat uchaos.c | ./chaos -T 2048 | ent * Boot log test: cat dmesg.txt | ./uchaos -S -M2 | ent * * Compile w/libc: gcc uchaos.c -O3 --fast-math -Wall -o uchaos -s -lm * Compile 4speed: -mavx2 -march=native -funroll-loops * Compile w/musl: musl-gcc uchaos.c -O3 --fast-math -Wall -o uchaos -s -static * Compile option: -D_USE_GET_RTSC (i686: -m32 -msse2), -D_USE_LINUX_RANDOM_H * -D_USE_FUNCS_32 (i686: -m32, native), -D_USE_PREV_TIME * Test with: ent, dieharder, PractRand RNG_test (compiled for Ubuntu 22.04 x64) * drive.google.com/file/d/17ymBcxfO2pA8ET7T4ZxiiO2EYW6_F8Lu/view * Qemu test: cd bare-minimal-linux-system; sh start.sh "" bzImage.515x * cp -arf prpr/bin update/common/usr (to add missing binaries) * Data production: uctest.sh (shell script for faster production) * * ***************************************************************************** * * PRESENTATION * * despite a relatively simple coding, this uchaos binary is able to provide * nearly 4MB/s of (pseudo) random numbers generated from a static input (known * known and constant in time) by stochastics jitter-drive hashing. * * Not even a special hash, just a modified version of one of the most ancient * text-oriented hashing functions that was written before every theory check * (practice shows that it was good for real, not for chance). * * Contrary to the past experiments, the streams mixing here doesn't even happen * because it is a single thread process. And by the way, past experiments were * mixing almost known text strings but they could have easily mixed the various * /dev/random streams in such a way that even if /dev/random would have been * "tampered" by a global attacker of by NSA design, then such mixage would * have provided good speed and security. * * Note: current implementation is a text strings-only PoC, it cannot deal with * binary input that has EOB char (\0) in the input data. Quick to change but it * is not the point here but djb2tum, possibly. * * While uchaos.c has been developed as early source for /dev/random leveraging * dmesg timings in the boot log, it can works with a simple adaptation (text * ending with \0 to binary) also with /dev/random stream as input (source). * * SPOILER * * When processing /dev/random output, uchaos acts as a conditioning * function. Per Bellare (2006/2014), non-PRF functions can preserve * security in specific constructions. The kernel's mixing tolerates * this, though uchaos itself is not independently NIST-certified. * * ***************************************************************************** * * PORPOSE * * .config - Linux/x86 6.12.71 Kernel Configuration → Kernel hacking * * CONFIG_WARN_ALL_UNSEEDED_RANDOM: * * Some parts of the kernel contain bugs relating to their use of cryptographically * secure random numbers before it's actually possible to generate those numbers * securely. This setting ensures that these flaws don't go unnoticed, by enabling * a message, should this ever occur. This will allow people with obscure setups * to know when things are going wrong, so that they might contact developers * about fixing it. * * Unfortunately, on some models of some architectures getting a fully seeded CRNG * is extremely difficult, and so this can result in dmesg getting spammed for a * surprisingly long time. This is really bad from a security perspective, and * so architecture maintainers really need to do what they can to get the CRNG * seeded sooner after the system is booted. However, since users cannot do * anything actionable to address this, by default this option is disabled. * * Say Y here if you want to receive warnings for all uses of unseeded randomness. * This will be of use primarily for those developers interested in improving the * security of Linux kernels running on their architecture (or subarchitecture). * * ***************************************************************************** * * OUTPUT TESTS * * The first run of commit (#9c8f4f00) passed all the dieharder tests with 49MB. * * gcc uchaos.c -O3 -Wall -o chaos && strip chaos * time cat ../prpr/uchaos.c | ./chaos -T 100000 > uchaos.data.01 * * Tests: 100000, collisions: 0 over 6400000 hashes (0.00%) -- OK * Times: running: 19.981 s, hashing: 12.980 s, speed: 320.3 Kh/s * Bits in common compared to 50 % avg is 50.0000 % (-0.2 ppm) * * Collisions here are expected to be 100% for a traditional hash w/fixed input. * * cat uchaos.data.01 | ent * Entropy = 7.999997 bits per byte. * Optimum compression would reduce the size of this 51200000 byte file by 0 percent. * Chi square distribution for 51200000 samples is 247.48, and randomly would exceed * this value 62.04 percent of the times (50% is the ideal value, but 10-90% is fine). * Arithmetic mean value of data bytes is 127.5076 (127.5 = random). * Monte Carlo value for Pi is 3.140672935 (error 0.03 percent). * Serial correlation coefficient is 0.000300 (totally uncorrelated = 0.0). * * ***************************************************************************** * * TODO LIST * * Before and after writing in the /dev/random, read avail. entropy from proc * * Create a set of "bit of entropy per byte" polices, and use #if to compile: * - optimistic 7hw 3vm; or flipcoin 4hw 2vm; or minimal 2hw 1vm. * **************************************************************************** */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define AVGV 127.5 #define E3 1000 #define E6 1000000 #define E9 1000000000 #define MAX_READ_SIZE 4096 #define MAX_COMP_SIZE (MAX_READ_SIZE << 1) #define ABS(a) ( ( (a) < 0 ) ? -(a) : (a) ) #define MIN(a,b) ( ( (a) < (b) ) ? (a) : (b) ) #define MAX(a,b) ( ( (a) > (b) ) ? (a) : (b) ) #define ALGN64(n) ( ( ( (n) + 63) >> 6 ) << 6 ) #define BIT(v,n) ( ( (v) >> (n) ) & 1 ) #define perr(x...) fprintf(stderr, x) #if 0 // RAF: this code is not used anymore but remains for educational purpose // functionally is converted into a commentary section about uchaos.c // #define ALPH64 "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz0123456789@\n" static inline uint8_t *bin2s64(uint8_t *buf, uint32_t nmax) { static const uint8_t c[] = ALPH64; for(register uint32_t i = 0; i < nmax; i++) buf[i] = c[ 0x3F & buf[i] ]; return buf; } #endif #ifdef __x86_64__ #pragma message("Compiling for the 64-bit arch") #define ALGN 128 // host's CPU can have AVX or AVX2 instructions #else #pragma message("Compiling for the 32-bit arch") #define ALGN 64 // host can be a 64bit machine with pie32 elf #endif #ifdef _USE_FUNCS_32 #define USE_FUNCS_32 1 #else #define USE_FUNCS_32 0 #endif #if USE_FUNCS_32 #pragma message("Using the 32-bit functions set") typedef float archdf_t; typedef uint32_t archul_t; #define AB 5 // 5 -> 32 #else typedef double archdf_t; #if defined(__SIZEOF_INT128__) #pragma message("Using the 128-bit functions set") #define HAS_UINT128 1 typedef unsigned __int128 uint128_t; typedef uint128_t archul_t; #define AB 7 // 7 -> 64 #else #pragma message("Using the 64-bit functions set") #define HAS_UINT128 0 typedef uint64_t archul_t; #define AB 6 // 6 -> 64 #endif #endif #define ABL (AB-3) // 2 or 3 #define ABN (1<>1)-1) // 15 or 31 #define ABy ((ABN>>2)-1) // 7 or 15 #define ABz ((ABN>>3)-1) // 3 or 7 #ifdef _USE_GET_RTSC /* ****************************************************** */ #define USE_GET_TIME 0 /* * Available only on x86 architecture, thus not portable * moreover, when the CPU id changes the two clocks aren't * necessarily synchronised anymore but also the CPU switch * is a matter of stochastics, and it is fine but not monotonic. * * WARNING * * When using uchaos.c to seed /dev/random at boot time, keep in consideration * that TSC will be ready after one second while the system could have entered * in /init esecution at 0.1s which suggest that TSC isn't suitable, anyway. */ #include #ifndef __SSE2__ #warning "SSE2 not detected (-msse2). Falling back to skew risky CPUID fence." static inline uint32_t __cpuid__slfence(void) { uint32_t eax, ebx, ecx, edx; // cpuid is a heavy-duty serializer __asm__ __volatile__ ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(1) : "memory"); return (ebx >> 24); } #endif static inline uint32_t get_rdtsc_clock(uint32_t *pcpuid) { __asm__ __volatile__ ("" ::: "memory"); // Compiler barrier: avoid reording #ifdef __SSE2__ _mm_lfence(); return __rdtscp(pcpuid); #else #warning "Instruction __rdtscp() not available. Falling back to __asm__ rdtsc." uint32_t lsb, msb; *pcpuid = __cpuid__slfence(); // non-atomic: scheduler can switch CPU here, it needs sched_setaffinity() __asm__ __volatile__ ("rdtsc" : "=a" (lsb), "=d" (msb)); return lsb; #endif } #else /* ******************************************************************** */ #define USE_GET_TIME 1 #endif /* ******************************************************************* */ #if 0 // RAF: this code is not used anymore but remains for educational purpose // functionally is converted into a commentary section about uchaos.c // #define PRMX 0 #define IDIV10(x) ((0xcccccccdULL * x) >> 35) static inline uint32_t imod10(uint32_t x) { uint32_t a = IDIV10(x); x -= (a << 3) + (a << 1); // mult.: 10x = 8x + 2x return x; } /* * This sequence of primes has a peculiar structure: x, y where x + y = 64. * Both members of each pair is a prime number, and by rotl64 are like x, -x. * They are only five pair of these numbers summing up 64, thus 10 = 3.32 bits. */ static const uint8_t primes64[20] = { 3, 61, 5, 59, 11, 53, 17, 47, 23, 41, 19, 45, 29, 35, 31, 33, 13, 51, 7, 57 }; static inline uint32_t getprmx10(uint32_t x) { return primes64[imod10(x)]; } static inline uint32_t getprmx20(uint32_t x) { x = imod10(x) + (x & 0x10) ? 10 : 0; return primes64[x]; } static inline uint32_t getprmx16(uint32_t x) { return primes64[ 2 + (x & 0x1f) ]; } #else #define PRMX 0 #define getprmx16(w) (5 + (((w) & ABy) << 1)) #endif static inline archul_t rotlbit(archul_t n, uint8_t c) { c &= ABX; return (n << c) | (n >> ((-c) & ABX)); } #define BLOCK_SIZE 512 #ifdef _USE_LINUX_RANDOM_H #include #else #define RNDADDENTROPY 0x40085203 struct rand_pool_info_buf { int entropy_count; int buf_size; uint32_t buf[BLOCK_SIZE / sizeof(uint32_t)]; } __attribute__((packed)); #endif /* *** HASHING ************************************************************* */ /* * The gcc compiler is good at using registers, but other simpler compilers for * micro architectures may be much less. So, when register as keyword is used, * it is impose a pragma, and if compiler cannot satisfy it, warning at least. */ static inline uint32_t getnstime(uint32_t *pcpuid) { #if ! USE_GET_TIME if(pcpuid) return get_rdtsc_clock(pcpuid); #endif uint32_t ns; struct timespec ts; // using sched_yield() to creates chaos, clock_gettime(CLOCK_MONOTONIC, &ts); // getting ns in a hot loop is the limit // and we want to see this limit, in VMs ns = ts.tv_nsec; ns += (ts.tv_sec & 1) ? E9 : 0; ns += (ts.tv_sec & 2) ? (E9 << 1) : 0; return ns; } #if 0 // RAF: this code is not used anymore but remains for educational purpose // functionally is converted into a commentary section about uchaos.c // /* * This function isn't 32bit fast but a variant of the original Park-Miller * which has a 2^31-2 period. It is useful to introduce in the uchaos output * a grid-biased distribution which the murmur3 cannot remove but amplify. * * ref. Marsaglia's theorem */ static inline uint64_t parkmiller32(uint64_t z) { return (((z << 1) + 1) * 48271) % 0x7FFFFFFF; } /* * These functions below are, like the above, relatively slow compared with * bit operations because multiplications which impacts platforms without FPU * or accelerated instructions. Therefore, they can be used at the end of the * hot-loop producing the hash but inside they can kill the performances. * * For this reason, the uchaos multiply the 5-bit "entropy" from scheduler * jittering using a Xoshiro approach that mix internal states with shift * and rotations which are extremely fast on every architecture. Finally * the hash is whitened with a diffusing avalanche 32 multiplication which * impacts the 32+1 LSB which are mixed with 33 MLB, thus 1-bit overposition. * * ref. Lorenz strange attractors' theory */ static inline uint64_t murmur3(uint64_t hs) { register uint64_t z = hs; z = (z ^ (z >> 33)) * 0xff51afd7ed558ccdULL; z = (z ^ (z >> 33)) * 0xc4ceb9fe1a85ec53ULL; z = (z ^ (z >> 33)); return z; } static inline uint16_t mm3ns16(uint16_t ns, uint16_t p) { register uint32_t z = ns; z = (p ^ (z >> 7)) * 0x45d9f3b; z = (z ^ (z >> 8)) * 0x45d9f3b; z = (z ^ (z >> 9)); return (uint16_t)z; } #endif #include #if 0 // RAF: this code is not used anymore but remains for educational purpose // functionally is converted into a commentary section about uchaos.c // /* RAF: unificated by new rotations approach ************************** */ #define STBX 0 #define STBRSTR "stochastics branches" #define FINAL_AVALANCHE_MLT 0xc4ceb9fe1a85ec53ULL #define perrwrn() perr("\nWARNING: "APPNAME" isn't compiled with "STBRSTR"\n\n") #define murmux3(o,h) rotl64((h * FINAL_AVALANCHE_MLT) ^ (h >> 33), 13 + ((o & 0x1f) << 1)) #define entropy(sz) ((sz << 3) - sz) // eq. to 8x (8-1) #define minmix8 #define knuthmx #else /* ******************************************************************** */ #define STBX 1 #define perrwrn() #define entropy(sz) ((sz << 2) - sz) // eq. to 3x (4-1) static inline uint8_t minmix8(uint8_t b) { b *= (b & 1) ? 0x4d : 0x65; b ^= ((b >> 3) | (b << 5)); return b; } /* * RAF, interesting sequence by a recursive function * static inline archul_t rfv(archul_t x) { x >>= USE_FUNCS_32; x += !(x&1); return x; } #define rfc(x) ((x >> USE_FUNCS_32) + !((x >> USE_FUNCS_32)&1)) #define ABT 47 // rf(ABT) 47,23 // rf(rf(ABT)) 23,13 // rf(rf(rf(ABT))) 13,7 // rf(rf(rf(rf(ABT)))) 7,3 // rf(rf(rf(rf(rf(ABT))))) 3,1 */ #if USE_FUNCS_32 #define murmul1 0x85ebca6b #define murmul2 0xc2b2ae35 #define murmul3 0x045d9f3b #define rot1 23 #define rot2 17 #define rot3 7 #else #if HAS_UINT128 #define murmul1 (((uint128_t)0x87c37b91114253d5ULL << 64) | 0x4cf5ad432745937fULL) #define murmul2 (((uint128_t)0xff51afd7ed558ccdULL << 64) | 0xc4ceb9fe1a85ec53ULL) #define murmul3 (((uint128_t)0x9e3779b97f4a7c15ULL << 64) | 0xf39cc0605cedc834ULL) #define rot1 83 #define rot2 31 #define rot3 17 #else #define murmul1 0xff51afd7ed558ccdULL #define murmul2 0xc4ceb9fe1a85ec53ULL #define murmul3 0x9E3779B9045d9f3bULL #define rot1 47 #define rot2 17 #define rot3 13 #endif #endif static inline archul_t knuthmx(archul_t iw) { register archul_t w = iw; w = rotlbit(w, getprmx16(w)); w *= (w & 1) ? 0x9E3779B9 : 0x045d9f3b; w ^= rotlbit(w, (w & 2) ? (47 >> USE_FUNCS_32) : 17); return w; } static inline archul_t murmux3(archul_t ks, archul_t p) { #if 0 register archul_t z = knuthmx(~ks); z = (p ^ (z >> (ABx-2))) * murmul1; z = (ks ^ (z << ABx )) * murmul2; z = (z ^ (z >> (ABx+2))) * murmul3; return rotlbit(z, getprmx16(p>>2)); #else register archul_t z = ks; z = p ^ ((z >> (ABx-2)) * murmul1); z = (z ^ ( z << ABx )) * murmul2; z = z ^ ( z >> (ABx+2)); return z; #endif } #endif /* ******************************************************************* */ #define pidx(p) ((uint32_t)(uintptr_t)(p)) #define perr_app_info(a) { perr("%s%s%u %s%s%s%s%s", (a)?"":"\n", APPNAME, ABN, VERSION,\ STBX?" w/sb":"", PRMX?"":" !/pr", USE_GET_TIME?"":" rtcs", (a)?"\n":""); } #define PMDLY2NS(x) ( ( ( x * pmdly ) + 127 ) >> 8 ) #define DJB2VGET ( (archul_t)-1 ) static inline int nsleep(uint32_t ns) { struct timespec remaining, request = { 0, ns }; int ret = nanosleep(&request, &remaining); return ret; } typedef struct djb2tum_status { uint64_t ncl, dmn, dmx; uint64_t tncl, tdmn, tdmx; uint64_t ctot, jmn, jmx; uint64_t evnt, nexp, javg; uint64_t avg, oid, pmns, ohs; } __attribute__((aligned(8))) djb2_t; /* * One of the most popular and efficient hash functions for strings in C is * the djb2 algorithm created by Dan Bernstein. It strikes a great balance * between speed and low collision rates. Great for text. * * 5381 Prime number choosen by Dan Bernstein, as 1010100000101 * empirically is one of the best for English words text. * Alternatives: * * 16777619 The FNV-1 offset basis (32-bit). * 14695981039346656037 The FNV-1 offset basis (64-bit). */ #if USE_FUNCS_32 #define HSHSEED 16777619 #else #define HSHSEED 14695981039346656037ULL #endif #define dtskew(x) (!x || (x)>>28) // 2^29 is the biggest 2^n before 1E9 #define djb2tum_status_init { 0,-1,0, 0,-1,0, 0,-1,0, 0,0,0, 0,-1,0, HSHSEED } static archul_t djb2tum(archul_t seed, uint8_t maxn, uint32_t nsdly, uint32_t pmdly, uint8_t nbtls, uint8_t rset) { // This is the internal state of the engine static djb2_t s = djb2tum_status_init; /* * In general, the previous time shouldn't be persistent across calls * but this function is running in strict sequence in a manner that * it makes sense keep the ons static, also because dtskew() exists */ #ifndef _USE_PREV_TIME static #endif archul_t __attribute__((aligned(16))) ons = 0; if( s.ncl || s.tncl || rset || seed == DJB2VGET ) { s.tncl += s.ncl; s.ncl = 0; s.tdmx = MAX(s.dmx, s.tdmx); s.tdmn = MIN(s.dmn, s.tdmn); s.pmns = PMDLY2NS (s.tdmn); } if( rset ) { s.dmn = -1, s.dmx = 0, s.ncl = 0; ons = 0; }; // The best practice is to share a copy not the internal state if( seed == DJB2VGET ) return (uintptr_t)&s; if( !maxn ) return 0; // 0. hashing loop preparation, p.1 //////////////////////////////////////// archul_t __attribute__((aligned(16))) tm_4s_nsec, dff, dlt, ent = 0; register archul_t hsh = s.ohs; uint8_t skw = !!ons, excp = 0; // excp++ as uint8_t grants for convergence if( seed ) hsh ^= seed; hashotloop: // a loop made by two ASM jumps /** HASHING LOOP START *******************************************************/ // 0. hashing loop preparation, p.2 //////////////////////////////////////// if( ent ) ent ^= rotlbit(ent, getprmx16(hsh)); // 1. ns latency time retrievement ///////////////////////////////////////// #if USE_GET_TIME tm_4s_nsec = getnstime( NULL ) >> nbtls; #else uint32_t cpuid; tm_4s_nsec = getnstime(&cpuid) >> nbtls; if( cpuid != (uint32_t)s.oid && s.oid != -1 ) { // Knuth, based on gold section seeded by CPU ids event idx hsh = murmux3(hsh, ((archul_t)cpuid << ABy) | s.oid); // reschedule in the following !ons branch ons = 0; s.nexp++; } s.oid = cpuid; #endif if( !ons ) { hsh = knuthmx(hsh ^ tm_4s_nsec); goto reschedule; } // 2. latency calculation ////////////////////////////////////////////////// dlt = tm_4s_nsec - ons; // within 4s is fine, with RTCS is always fine if( dtskew(dlt) ) { goto reschedule; } if( s.dmn == -1 ) { s.dmn = dlt; goto reschedule; } if( skw ) { goto notcrashstats; } // 3. internal state update //////////////////////////////////////////////// // dmn calculation is mandatory for stochastics bi-forkation turns if( dlt < s.dmn ) { dff = s.dmn - dlt; s.dmn = dlt; ent ^= -dff ^ s.dmn; s.evnt++; } else // dmx calculation can be omited but doing ns*=0x4d anyway if( dlt > s.dmx ) { dff = dlt - s.dmn; s.dmx = dlt; ent ^= dff ^ -s.dmx; s.evnt++; } else { notcrashstats: dff = dlt - s.dmn; ent ^= ~dff ^ s.dmx; } if( !dff ) { hsh = knuthmx(hsh); skw = 0; goto reschedule; } // 4. jittering calculation //////////////////////////////////////////////// // dff is jittering for the exeption manager activation if( dff < nsdly + (pmdly ? PMDLY2NS(s.dmn) : 1) + excp ) { excp += 4; // increasing excp and accounting for dff s.nexp++; skw = 0; } else { // Knuth, based on gold section seeded by 1E-3 ~ 1E-4 event idx if( excp ) { hsh = murmux3(hsh, ons); } excp = 0; if( skw ) { skw = 0; goto skiptoetropy; } // min,max jittering can be ommited if( s.jmn == -1 ) s.jmn = dff; else if( dff < s.jmn ) s.jmn = dff; if( dff > s.jmx ) s.jmx = dff; // avg calculation can be ommitted s.avg += dlt; s.javg += dff; s.ncl++; } // 5. entropy distillation ///////////////////////////////////////////////// skiptoetropy: ent ^= dlt << ABz ; // 1st derivative of time ent ^= tm_4s_nsec << rot3; // current monotonic time ent = knuthmx(ent ^ dff); // 2nd derivative of time // 6. macro-mix in djb2-style ////////////////////////////////////////////// /* * (16+1) (32-1 or 32+1) (64-1) * 01 10 00 11 */ // it consumes entropy, does rotations and forgets the state uint8_t b0 = ent & 0x01, b1 = ent & 0x02; ent = ent >> 2; hsh = ( hsh << (4 + (b0 ? b1 : 1)) ) + (b1 ? -hsh : hsh); // 7. entropy injection in hsh ///////////////////////////////////////////// // it consumes entropy, and does hash the another rotation hsh ^= rotlbit(hsh, getprmx16(ent)); // 8. preparation for the next round /////////////////////////////////////// s.ctot++; // copying with the VMs scheduler timings: continue made by an ASM jump reschedule: if( skw ) { skw = 0; } if( !excp ) { maxn--; ons = tm_4s_nsec; } if( excp || maxn ) { sched_yield(); goto hashotloop; } /** HASHING LOOP CLOSE *******************************************************/ // 9. finalising w/ a 32+1 bit mix ///////////////////////////////////////// ent = hsh; // forget the entropy mixed in hash hsh = murmux3(hsh, s.ohs); // whitening the hash before deliver s.ohs = ent; // keep the hashing internal state return hsh; } static inline uint8_t trndbyte() { sched_yield(); return 0xFF & getnstime(NULL); } static inline uint8_t *bin2str(uint8_t *buf, uint32_t nmax) { for(register uint32_t i = 0; i < nmax; i++) if(!buf[i]) buf[i--] = trndbyte(); return buf; } archul_t *str2hsh(const uint8_t *str, archul_t *h, uint32_t *size, uint32_t nsdly, uint32_t pmdly, uint8_t nbtls, uint8_t rset) { if (!str || !size) return NULL; // 1. Calculate allocation // We need enough 64-bit blocks to cover n bytes. uint32_t i, n = 0, nwords = (*size + ABz) >> ABL; // 2. Generate the words-sized array // We allocate a separate array for hashes if that was the intent, or we cast // the rotated string. Based on your code, you want a hash per 8-byte block. if(h == NULL) { if(posix_memalign((void **)&h, ALGN, nwords << ABL) || !h) { perror("posix_memalign"); return NULL; } } *size = nwords; // 3. Producing the hashing sequence archul_t *p = (archul_t *)str; for (i = 0; i < nwords; i++) { n += ABz+1; // Processing each n-bytes chunk of the rotated/padded string h[i] = djb2tum(p[i], 1 + !!rset, nsdly, pmdly, nbtls, 0); if ( rset && n >= ((archul_t)1 << rset) ) { n = 0; djb2tum(0, 0, 0, 0, 0, 1); } } return h; } /** I/O ***********************************************************************/ static inline uint32_t writebuf(int fd, const uint8_t *buffer, uint32_t ntwr) { uint32_t tot = 0; while (ntwr > tot) { errno = 0; int nw = write(fd, buffer + tot, ntwr - tot); if (nw < 1) { if (errno == EINTR) continue; perror("write"); exit(EXIT_FAILURE); } tot += nw; } return tot; } static inline uint32_t readbuf(int fd, uint8_t *buffer, uint32_t size, bool intr) { uint32_t tot = 0; while (size > tot) { errno = 0; int nr = read(fd, buffer + tot, size - tot); if (nr == 0) break; if (nr < 0) { if (errno == EINTR) { if(intr) return tot; else continue; } perror("read"); exit(EXIT_FAILURE); } tot += nr; } return tot; } typedef union { uint8_t uc[BLOCK_SIZE]; archul_t dt[BLOCK_SIZE>>ABL]; } __attribute__((aligned(16))) block512_t; static inline uint32_t readblocks(int fd, uint8_t *buf, uint32_t *nblks) { if(!nblks) return 0; block512_t inp, fst; // Reading max 8 blocks to limit the overflow at min 5 LSB bits, // considering that ASCII text is almost all chars in 32-122 range. // Anyway, pre-processing the input may help but it shouldn't matter // when the final aim is to feed uchaos for providing randomness. uint32_t a, i, n, maxn = 0; memset(buf, 0, BLOCK_SIZE); // Input size 16 * 512 = 8K as relevant initial dmesg log before init for(i = 0; i < *nblks; i++) { n = readbuf(fd, inp.uc, BLOCK_SIZE, 0); if(!n) break; else maxn = MAX(maxn, n); if(i) { if(n < BLOCK_SIZE) { memcpy(&inp.uc[n], fst.uc, BLOCK_SIZE-n); n = BLOCK_SIZE; } } else { if(n == BLOCK_SIZE) memcpy(fst.uc, inp.uc, BLOCK_SIZE); } block512_t *bp = __builtin_assume_aligned( (block512_t *)buf, 16 ); // mixing the input by words n = (n + ABz) >> ABL; for (a = 0; a < n; a++) bp->dt[a] ^= inp.dt[a]; } *nblks = i; return maxn; } /* ** main & its supporters ************************************************* */ // Funzione per ottenere il tempo in nanosecondi static uint64_t get_nanos(void) { static uint64_t start = 0; struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); if (!start) { start = (uint64_t)ts.tv_sec * E9 + ts.tv_nsec; return start; } return ((uint64_t)ts.tv_sec * E9 + ts.tv_nsec) - start; } static inline void usage(const char *name, const char *cmdn, const uint8_t qlvl) { perr("\n"\ "%s "VERSION" reads from stdin, stats on stderr, and rand on stdout.\n"\ "|\n"\ "\\_ Usage: %s [-h,q%s,V] [-T/K/M/G N] [-d,p,s,r N] [-k /dev/rnd]\n"\ " |\n"\ " | -qq,-q: (extra) quiet run for scripts automation\n"\ " | -K/-M/-G: kilo/mega/giga bytes of data w/ stats on\n"\ " | -S: low-entropy VMs seeding settings (r31,d3,i16,K4)\n"\ " | -Z: the same as -S but with a reset at 2^19 bytes\n"\ " | -T: number of collision tests x2 on the same input\n"\ " | -d: number of ns above min as the minimum delay\n"\ " | -p: number of parts as min/256 ns above the min\n"\ " | -s: number of bits to left shift on ns timings\n"\ " | -r: number of preliminary runs (default: 1)\n"\ " | -k: randomness injection in kernel by ioctl\n"\ " | -i: number of 512B-blocks to read from stdin\n"\ " | -h/-v: shows this help / appname and version\n"\ " |\n "\ "\\_ With -pN is suggested -r31 or -r63 for stats pre-evaluation.\n"\ " |\n"\ " \\_ Typeical: [sudo] %s -Sk /dev/random (at /init time)\n\n", name, cmdn, (qlvl > 1) ? "^" : "", cmdn); return; } #define APPNAME "uChaos" #define STCX STOCHASTIC_BRANCHES #define perrprms(s,p) perr("%s s:%u, q:%u, d+p(%u):%u+%u ns, r:%u, i:%u, Z:%u\n\n",\ s, nbtls, quiet, pmdly, nsdly, p?p:1, nrdry, nblks, rset) typedef double __attribute__((aligned(8))) df; int main(int argc, char *argv[]) { struct rand_pool_info_buf entrnd; uint8_t *str = NULL, nbtls = 0, prsts = 0, quiet = 0, rset = 0; uint32_t ntsts = 1, nsdly = 0, nrdry = 1, nblks = 1, pmdly = 0; int devfd = 0; // Collect arguments from optional command line parameters while (1) { int opt = getopt(argc, argv, "hvSZG:M:K:T:s:d:p:r:k:i:q"); if(opt == 'S' || opt == 'Z') { nsdly = 3; nblks = 16; nrdry = 31; ntsts = 8; rset = (opt == 'Z') ? 19 : 0; perrwrn(); } else if(opt == 'v') { perr_app_info(2); return 0; } else if(opt == 'q') { quiet = (++quiet) ? quiet : 2; } else if(opt == '?' || opt == 'h') { char *p, *q = argv[0]; if(q) for(p = q; *p; p++) if(*p == '/') q = p+1; usage(APPNAME, q ? q : "uchaos", quiet); return 0; } else if(opt == -1) { break; } if(!optarg) continue; long x = atoi(optarg); switch (opt) { // ABS sanitises the parametric inputs case 's': nbtls = ABS(x); break; case 'd': nsdly = ABS(x); break; case 'r': nrdry = ABS(x); break; case 'p': pmdly = ABS(x); break; case 'i': nblks = ABS(x); break; case 'k': devfd = open(optarg,O_WRONLY); break; case 'G': ntsts = ABS(x); ntsts <<= 21 ; prsts = 1; break; case 'M': ntsts = ABS(x); ntsts <<= 11 ; prsts = 1; break; case 'K': ntsts = ABS(x); ntsts <<= 1 ; prsts = 1; break; case 'T': ntsts = ABS(x); ntsts <<= 1 ; prsts = 1; break; } } if (devfd < 0) { perror("open device"); return EXIT_FAILURE; } if(quiet) prsts = 0; // Counting time of running starts here, after parameters (void) get_nanos(); if (posix_memalign((void **)&str, ALGN, BLOCK_SIZE + ABz+1) || !str) { perror("posix_memalign"); return EXIT_FAILURE; } uint32_t n = (nblks < 2) ? readbuf(STDIN_FILENO, str, BLOCK_SIZE, 0) \ : readblocks(STDIN_FILENO, str, &nblks); if(n < 1) return EXIT_FAILURE; // djb2tum(9 code refactored thus not anymore //if (nblks > 1) bin2str(str, n); // necessary because djb2tum() born for text, str[n] = 0; // refactoring it for binary input, is the way. archul_t *hsh = NULL; for(uint32_t a = nrdry; a; a--) { uint32_t size = n; hsh = str2hsh(str, hsh, &size, nsdly, pmdly, nbtls, 0); if(!hsh) return EXIT_FAILURE; } archul_t max = 0, min = E9; uint64_t bic = 0, avg = 0, nk = 0, nt = 0, nx = 0, nn = 0, mt = 0; df avgbc = 0, avgmx = 0, avgmn = 256; if(quiet < 2) { perr_app_info(0); if(prsts) { // RAF, TODO: dealing with size one. if(0 && nblks < 2) { perr("; too short input %u, no stats/check!\n\n", ntsts); prsts = 0; } else perr("; collision: "); } else perrprms("", 0); } for (uint32_t a = ntsts; a; a--) { // hashing uint32_t size = n; uint64_t stns = get_nanos(); /**** hashing time accounting start ******/ hsh = str2hsh(str, hsh, &size, nsdly, pmdly, nbtls, rset); mt += get_nanos() - stns; /******* hashing time accounting stop *******/ if(!hsh) return EXIT_FAILURE; uint32_t sz = size << ABL; if(devfd) { entrnd.buf_size = sz; entrnd.entropy_count = entropy(sz); memcpy((uint8_t *)entrnd.buf, (uint8_t *)hsh, sz); #define OPTNK "option -k is designed for /dev/[u]random only" if (ioctl(devfd, RNDADDENTROPY, &entrnd) < 0 && errno != EINTR) { if(errno == ENOTTY) { perr("\nERROR: "APPNAME" "OPTNK"\n\n"); } else perror("ioctl entrnd"); return EXIT_FAILURE; } if (quiet < 2) // avoid the need of >/dev/null writebuf(STDOUT_FILENO, (uint8_t *)hsh, sz); } else { writebuf(STDOUT_FILENO, (uint8_t *)hsh, sz); } // single run if(ntsts < 2) return 0; // skip stats if(!prsts) continue; // self-evaluation of the output including the check for repetitions avg = 0, nn = 0; for (uint32_t n = 0; n < size; n++) { for (uint32_t i = n + 1; i < size; i++) { if (hsh[i] == hsh[n]) { perr("%d:%d ", n, i); nk++; continue; } archul_t cb = hsh[i] ^ hsh[n]; int ham = 0; for (int a = 0; a < ABN; a++) ham += (cb >> a) & 0x01; bic += ham; avg += ham; if(max < ham) max = ham; if(min > ham) min = ham; nx++; nn++; } } df curavg = (df)avg / nn; if(avgmx < curavg) avgmx = curavg; if(avgmn > curavg) avgmn = curavg; avgbc += curavg; nt += size; //sched_yield();// Statistics are a block of CPU data-crunching but also // a predictable delay which sched_yield() can jeopardise. // Stats makes the large size output slower 1.7x than -q. } uint64_t rt = get_nanos(); free(hsh); hsh = NULL; if(!prsts) return 0; perr("%s\n", nk ? ", status: KO" : "no, status: OK"); // print statistics //////////////////////////////////////////////////////// djb2_t *s = (djb2_t *)(uintptr_t)djb2tum(DJB2VGET, 0, 0, pmdly, 0, 0); perrprms("Setting:", (uint32_t)(s ? s->pmns : 0)); perr("Hashing: %u, ", ntsts); if((nt >> 3) > E6) perr("%.2lfMH (%.2lfGB)", (df)nt/(1ULL<<20), (df)nt/(1ULL<<(30-ABL))); else perr("%.1lfKH (%.1lfMB)", (df)nt/(1ULL<<10), (df)nt/(1ULL<<(20-ABL))); perr(" w/ %.0lf duplicates\n", (df)nk); if(nblks < 2) goto skiphamming; avgbc /= ntsts; df bic_nx_absl = (df)bic / nx; df bic_nx = (df)100 / ABN * bic_nx_absl; #define devppm(v,a) ( ((df)v-a) * E6 / a ) perr("Hamming : %.4lf%% ~ 50%% by (%+.4lg ppm)\n", bic_nx, devppm(bic_nx, 50)); perr("Hamming distance: %.0lf <%.6lf> %.0lf over %.4lgK XORs\n", (df)min, bic_nx_absl, (df)max, (df)nx/E3); perr("Hamming dist/avg: %.4lf < 1U:%u %+.4lg ppm > %.4lf\n\n", avgmn/bic_nx_absl, ABN, devppm(bic_nx_absl, 32), avgmx/bic_nx_absl); skiphamming: perr("Perform: exec %.3lgs, %.3lg MB/s; hash %.3lgs, %.01lf KH/s\n", (df)rt/E9, (df)(E9>>(20-ABL))*nt/rt, (df)mt/E9, (df)(E9>>10)*nt/mt); if(nblks < 2) goto skiptimings; df mean = (df)s->avg / s->tncl; df jean = (df)s->javg / s->tncl; #define dk(a,b,c) ( (1.0 - (df)(a-b)/c) * E3 ) perr("Latency: %.0f <%.01lf> %.01lfK ns, %.3lgK w/ ev:%.0f, ex:%5.02lf%%\n", (df)s->tdmn, mean, (df)s->tdmx/E3, (df)s->tncl/E3, (df)s->evnt, ((df)s->nexp/s->ctot)*100); perr("`Ratios: %.02lf %.02lf, min=1U <%.02lf> %.01lf, %.01fb\n", (df)s->tdmn/mean, (df)s->tdmx/mean, mean/s->tdmn, (df)s->tdmx/s->tdmn, __builtin_log2f(mean - s->tdmn)); perr("Jitters: %.0f <%.01lf> %.0f ns w/ %.03lgx, %.0lf:1K, %+.01lf‰\n", (df)s->jmn, jean, (df)s->jmx, (df)s->jmx/jean, dk(s->ctot, s->tncl, s->ctot), dk(mean, s->tdmn, jean)); skiptimings: perr("\n"); return 0; // exit() do free() }