diff --git a/stage1/wasi.c b/stage1/wasi.c
index 9d35efd280..0c91dab76e 100644
--- a/stage1/wasi.c
+++ b/stage1/wasi.c
@@ -184,6 +184,7 @@ enum wasi_whence {
     wasi_whence_end = 2,
 };
 
+#ifndef ZIG1_WASI_INCLUDE_HEADER
 extern uint16_t load16_align0(const uint8_t *ptr);
 extern uint16_t load16_align1(const uint16_t *ptr);
 extern uint32_t load32_align0(const uint8_t *ptr);
@@ -207,6 +208,10 @@ extern void store64_align3(uint64_t *ptr, uint64_t val);
 extern uint8_t **const wasm_memory;
 extern void wasm__start(void);
 
+#else
+#include ZIG1_WASI_INCLUDE_HEADER
+#endif
+
 static int global_argc;
 static char **global_argv;
 
diff --git a/stage1/wasm2c.c b/stage1/wasm2c.c
index 425cc682b8..88027a9880 100644
--- a/stage1/wasm2c.c
+++ b/stage1/wasm2c.c
@@ -73,11 +73,28 @@ static void renderExpr(FILE *out, struct InputStream *in) {
     }
 }
 
+static void renderExpr_two_out(FILE *out0, FILE *out1, struct InputStream *in) {
+    while (true) {
+        switch (InputStream_readByte(in)) {
+            case WasmOpcode_end: return;
+
+            case WasmOpcode_i32_const: {
+                uint32_t value = (uint32_t)InputStream_readLeb128_i32(in);
+                fprintf(out0, "UINT32_C(0x%" PRIX32 ")", value);
+                fprintf(out1, "UINT32_C(0x%" PRIX32 ")", value);
+                break;
+            }
+
+            default: panic("unsupported expr opcode");
+        }
+    }
+}
+
 static const uint32_t big_endian = 0xff000000;
 
 int main(int argc, char **argv) {
-    if (argc != 3 && argc != 4) {
-        fprintf(stderr, "usage: %s <in.wasm.zst> <out.c> [endian]\n", argv[0]);
+    if (argc != 3 && argc != 4 && argc != 5) {
+        fprintf(stderr, "usage: %s <in.wasm.zst> <out.c> [endian] [c files]\n", argv[0]);
         return 1;
     }
 
@@ -96,6 +113,14 @@ int main(int argc, char **argv) {
         is_big_endian = *(uint8_t *)&big_endian; // Infer from host endianness.
     }
 
+    uint32_t n_c_files = 1;
+
+    if (argc >= 5) {
+        n_c_files = strtoul(argv[4], NULL, 0);
+        if (n_c_files == ULONG_MAX || n_c_files == 0)
+            panic("Invalid strtoul()");
+    }
+
     const char *mod = "wasm";
 
     struct InputStream in;
@@ -107,8 +132,22 @@ int main(int argc, char **argv) {
         InputStream_readByte(&in) != 'm') panic("input is not a zstd-compressed wasm file");
     if (InputStream_readLittle_u32(&in) != 1) panic("unsupported wasm version");
 
-    FILE *out = fopen(argv[2], "wb");
-    if (out == NULL) panic("unable to open output file");
+    FILE *out;
+    FILE *tmp_out;
+    FILE *zig1_h_out;
+    FILE *wasi_gen_h_out;
+    FILE **n_c_files_stream;
+    if (n_c_files == 1){
+        out = fopen(argv[2], "wb");
+        if (out == NULL) panic("unable to open output file");
+    } else {
+        zig1_h_out = fopen("zig1.h", "wb");
+        if (zig1_h_out == NULL) panic("unable to open output file");
+        wasi_gen_h_out = fopen("wasi_gen.h", "wb");
+        if (wasi_gen_h_out == NULL) panic("unable to open output file");
+        out = wasi_gen_h_out;
+    }
+    if (n_c_files == 1){
     fputs("#include <float.h>\n"
           "#include <math.h>\n"
           "#include <stdint.h>\n"
@@ -344,6 +383,304 @@ int main(int argc, char **argv) {
           "    inited = 1;\n"
           "}\n"
           "\n", out);
+    } else {
+    // No static
+    fputs("#include <float.h>\n"
+          "#include <math.h>\n"
+          "#include <stdint.h>\n"
+          "#include <stdlib.h>\n"
+          "#include <string.h>\n"
+          "\n"
+          "uint16_t i16_byteswap(uint16_t src) {\n"
+          "    return (uint16_t)(uint8_t)(src >> 0) << 8 |\n"
+          "           (uint16_t)(uint8_t)(src >> 8) << 0;\n"
+          "}\n"
+          "uint32_t i32_byteswap(uint32_t src) {\n"
+          "    return (uint32_t)i16_byteswap(src >>  0) << 16 |\n"
+          "           (uint32_t)i16_byteswap(src >> 16) <<  0;\n"
+          "}\n"
+          "uint64_t i64_byteswap(uint64_t src) {\n"
+          "    return (uint64_t)i32_byteswap(src >>  0) << 32 |\n"
+          "           (uint64_t)i32_byteswap(src >> 32) <<  0;\n"
+          "}\n"
+          "\n", out);
+    fputs("uint16_t load16_align0(const uint8_t *ptr) {\n"
+          "    uint16_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i16_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint16_t load16_align1(const uint16_t *ptr) {\n"
+          "    uint16_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i16_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint32_t load32_align0(const uint8_t *ptr) {\n"
+          "    uint32_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint32_t load32_align1(const uint16_t *ptr) {\n"
+          "    uint32_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint32_t load32_align2(const uint32_t *ptr) {\n"
+          "    uint32_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint64_t load64_align0(const uint8_t *ptr) {\n"
+          "    uint64_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint64_t load64_align1(const uint16_t *ptr) {\n"
+          "    uint64_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint64_t load64_align2(const uint32_t *ptr) {\n"
+          "    uint64_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "uint64_t load64_align3(const uint64_t *ptr) {\n"
+          "    uint64_t val;\n"
+          "    memcpy(&val, ptr, sizeof(val));\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    return val;\n"
+          "}\n"
+          "\n"
+          "uint32_t i32_popcnt(uint32_t lhs) {\n"
+          "    lhs = lhs - ((lhs >> 1) & UINT32_C(0x55555555));\n"
+          "    lhs = (lhs & UINT32_C(0x33333333)) + ((lhs >> 2) & UINT32_C(0x33333333));\n"
+          "    lhs = (lhs + (lhs >> 4)) & UINT32_C(0x0F0F0F0F);\n"
+          "    return (lhs * UINT32_C(0x01010101)) >> 24;\n"
+          "}\n"
+          "uint32_t i32_ctz(uint32_t lhs) {\n"
+          "    return i32_popcnt(~lhs & (lhs - 1));\n"
+          "}\n"
+          "uint32_t i32_clz(uint32_t lhs) {\n"
+          "    lhs = i32_byteswap(lhs);\n"
+          "    lhs = (lhs & UINT32_C(0x0F0F0F0F)) << 4 | (lhs & UINT32_C(0xF0F0F0F0)) >> 4;\n"
+          "    lhs = (lhs & UINT32_C(0x33333333)) << 2 | (lhs & UINT32_C(0xCCCCCCCC)) >> 2;\n"
+          "    lhs = (lhs & UINT32_C(0x55555555)) << 1 | (lhs & UINT32_C(0xAAAAAAAA)) >> 1;\n"
+          "    return i32_ctz(lhs);\n"
+          "}\n"
+          "uint64_t i64_popcnt(uint64_t lhs) {\n"
+          "    lhs = lhs - ((lhs >> 1) & UINT64_C(0x5555555555555555));\n"
+          "    lhs = (lhs & UINT64_C(0x3333333333333333)) + ((lhs >> 2) & UINT64_C(0x3333333333333333));\n"
+          "    lhs = (lhs + (lhs >> 4)) & UINT64_C(0x0F0F0F0F0F0F0F0F);\n"
+          "    return (lhs * UINT64_C(0x0101010101010101)) >> 56;\n"
+          "}\n"
+          "uint64_t i64_ctz(uint64_t lhs) {\n"
+          "    return i64_popcnt(~lhs & (lhs - 1));\n"
+          "}\n"
+          "uint64_t i64_clz(uint64_t lhs) {\n"
+          "    lhs = i64_byteswap(lhs);\n"
+          "    lhs = (lhs & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4 | (lhs & UINT32_C(0xF0F0F0F0F0F0F0F0)) >> 4;\n"
+          "    lhs = (lhs & UINT64_C(0x3333333333333333)) << 2 | (lhs & UINT32_C(0xCCCCCCCCCCCCCCCC)) >> 2;\n"
+          "    lhs = (lhs & UINT64_C(0x5555555555555555)) << 1 | (lhs & UINT32_C(0xAAAAAAAAAAAAAAAA)) >> 1;\n"
+          "    return i64_ctz(lhs);\n"
+          "}\n"
+          "\n"
+          "void store16_align0(uint8_t *ptr, uint16_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i16_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store16_align1(uint16_t *ptr, uint16_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i16_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store32_align0(uint8_t *ptr, uint32_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store32_align1(uint16_t *ptr, uint32_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store32_align2(uint32_t *ptr, uint32_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i32_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store64_align0(uint8_t *ptr, uint64_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store64_align1(uint16_t *ptr, uint64_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store64_align2(uint32_t *ptr, uint64_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "void store64_align3(uint64_t *ptr, uint64_t val) {\n", out);
+    if (is_big_endian) fputs("    val = i64_byteswap(val);", out);
+    fputs("    memcpy(ptr, &val, sizeof(val));\n"
+          "}\n"
+          "\n"
+          "uint32_t i32_reinterpret_f32(const float src) {\n"
+          "    uint32_t dst;\n"
+          "    memcpy(&dst, &src, sizeof(dst));\n"
+          "    return dst;\n"
+          "}\n"
+          "uint64_t i64_reinterpret_f64(const double src) {\n"
+          "    uint64_t dst;\n"
+          "    memcpy(&dst, &src, sizeof(dst));\n"
+          "    return dst;\n"
+          "}\n"
+          "float f32_reinterpret_i32(const uint32_t src) {\n"
+          "    float dst;\n"
+          "    memcpy(&dst, &src, sizeof(dst));\n"
+          "    return dst;\n"
+          "}\n"
+          "double f64_reinterpret_i64(const uint64_t src) {\n"
+          "    double dst;\n"
+          "    memcpy(&dst, &src, sizeof(dst));\n"
+          "    return dst;\n"
+          "}\n"
+          "\n"
+          "uint32_t i32_trunc_sat_f32(const float src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return (uint32_t)(signbit(src) == 0 ? INT32_MAX : INT32_MIN);\n"
+          "    return (uint32_t)(int32_t)src;\n"
+          "}\n"
+          "uint32_t u32_trunc_sat_f32(const float src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return signbit(src) == 0 ? UINT32_MAX : 0;\n"
+          "    return (uint32_t)src;\n"
+          "}\n"
+          "uint32_t i32_trunc_sat_f64(const double src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return (uint32_t)(signbit(src) == 0 ? INT32_MAX : INT32_MIN);\n"
+          "    return (uint32_t)(int32_t)src;\n"
+          "}\n"
+          "uint32_t u32_trunc_sat_f64(const double src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return signbit(src) == 0 ? UINT32_MAX : 0;\n"
+          "    return (uint32_t)src;\n"
+          "}\n"
+          "uint64_t i64_trunc_sat_f32(const float src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return (uint64_t)(signbit(src) == 0 ? INT64_MAX : INT64_MIN);\n"
+          "    return (uint64_t)(int64_t)src;\n"
+          "}\n"
+          "uint64_t u64_trunc_sat_f32(const float src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return signbit(src) == 0 ? UINT64_MAX : 0;\n"
+          "    return (uint64_t)src;\n"
+          "}\n"
+          "uint64_t i64_trunc_sat_f64(const double src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return (uint64_t)(signbit(src) == 0 ? INT64_MAX : INT64_MIN);\n"
+          "    return (uint64_t)(int64_t)src;\n"
+          "}\n"
+          "uint64_t u64_trunc_sat_f64(const double src) {\n"
+          "    if (isnan(src)) return 0;\n"
+          "    if (isinf(src)) return signbit(src) == 0 ? UINT64_MAX : 0;\n"
+          "    return (uint64_t)src;\n"
+          "}\n"
+          "\n"
+          "uint32_t memory_grow(uint8_t **m, uint32_t *p, uint32_t *c, uint32_t n) {\n"
+          "    uint8_t *new_m = *m;\n"
+          "    uint32_t r = *p;\n"
+          "    uint32_t new_p = r + n;\n"
+          "    if (new_p > UINT32_C(0xFFFF)) return UINT32_C(0xFFFFFFFF);\n"
+          "    uint32_t new_c = *c;\n"
+          "    if (new_c < new_p) {\n"
+          "        do new_c += new_c / 2 + 8; while (new_c < new_p);\n"
+          "        if (new_c > UINT32_C(0xFFFF)) new_c = UINT32_C(0xFFFF);\n"
+          "        new_m = realloc(new_m, new_c << 16);\n"
+          "        if (new_m == NULL) return UINT32_C(0xFFFFFFFF);\n"
+          "        *m = new_m;\n"
+          "        *c = new_c;\n"
+          "    }\n"
+          "    *p = new_p;\n"
+          "    memset(&new_m[r << 16], 0, n << 16);\n"
+          "    return r;\n"
+          "}\n"
+          "\n"
+          "int inited;\n"
+          "void init_elem(void);\n"
+          "void init_data(void);\n"
+          "void init(void) {\n"
+          "    if (inited != 0) return;\n"
+          "    init_elem();\n"
+          "    init_data();\n"
+          "    inited = 1;\n"
+          "}\n"
+          "\n", out);
+    out = zig1_h_out;
+    fputs("#include <float.h>\n"
+          "#include <math.h>\n"
+          "#include <stdint.h>\n"
+          "#include <stdlib.h>\n"
+          "#include <string.h>\n"
+          "\n"
+          "extern uint16_t i16_byteswap(uint16_t src);\n"
+          "extern uint32_t i32_byteswap(uint32_t src);\n"
+          "extern uint64_t i64_byteswap(uint64_t src);\n"
+
+          "extern uint16_t load16_align0(const uint8_t *ptr);\n"
+          "extern uint16_t load16_align1(const uint16_t *ptr);\n"
+          "extern uint32_t load32_align0(const uint8_t *ptr);\n"
+          "extern uint32_t load32_align1(const uint16_t *ptr);\n"
+          "extern uint32_t load32_align2(const uint32_t *ptr);\n"
+          "extern uint64_t load64_align0(const uint8_t *ptr);\n"
+          "extern uint64_t load64_align1(const uint16_t *ptr);\n"
+          "extern uint64_t load64_align2(const uint32_t *ptr);\n"
+          "extern uint64_t load64_align3(const uint64_t *ptr);\n"
+
+          "extern uint32_t i32_popcnt(uint32_t lhs);\n"
+          "extern uint32_t i32_ctz(uint32_t lhs);\n"
+          "extern uint32_t i32_clz(uint32_t lhs);\n"
+          "extern uint64_t i64_popcnt(uint64_t lhs);\n"
+          "extern uint64_t i64_ctz(uint64_t lhs);\n"
+          "extern uint64_t i64_clz(uint64_t lhs);\n"
+
+          "extern void store16_align0(uint8_t *ptr, uint16_t val);\n"
+          "extern void store16_align1(uint16_t *ptr, uint16_t val);\n"
+          "extern void store32_align0(uint8_t *ptr, uint32_t val);\n"
+          "extern void store32_align1(uint16_t *ptr, uint32_t val);\n"
+          "extern void store32_align2(uint32_t *ptr, uint32_t val);\n"
+          "extern void store64_align0(uint8_t *ptr, uint64_t val);\n"
+          "extern void store64_align1(uint16_t *ptr, uint64_t val);\n"
+          "extern void store64_align2(uint32_t *ptr, uint64_t val);\n"
+          "extern void store64_align3(uint64_t *ptr, uint64_t val);\n"
+
+          "extern uint32_t i32_reinterpret_f32(const float src);\n"
+          "extern uint64_t i64_reinterpret_f64(const double src);\n"
+          "extern float f32_reinterpret_i32(const uint32_t src);\n"
+          "extern double f64_reinterpret_i64(const uint64_t src);\n"
+
+          "extern uint32_t i32_trunc_sat_f32(const float src);\n"
+          "extern uint32_t u32_trunc_sat_f32(const float src);\n"
+          "extern uint32_t i32_trunc_sat_f64(const double src);\n"
+          "extern uint32_t u32_trunc_sat_f64(const double src);\n"
+          "extern uint64_t i64_trunc_sat_f32(const float src);\n"
+          "extern uint64_t u64_trunc_sat_f32(const float src);\n"
+          "extern uint64_t i64_trunc_sat_f64(const double src);\n"
+          "extern uint64_t u64_trunc_sat_f64(const double src);\n"
+
+          "extern uint32_t memory_grow(uint8_t **m, uint32_t *p, uint32_t *c, uint32_t n);\n"
+          "extern int inited;\n"
+          "extern void init_elem(void);\n"
+          "extern void init_data(void);\n"
+          "extern void init(void);\n"
+//          "extern ;\n"
+
+          "\n", out);
+    }
 
     struct FuncType *types;
     uint32_t max_param_len = 0;
@@ -370,6 +707,7 @@ int main(int argc, char **argv) {
     {
         imports = malloc(sizeof(struct Import) * imports_len);
         if (imports == NULL) panic("out of memory");
+        if (n_c_files != 1) out = wasi_gen_h_out;
         for (uint32_t i = 0; i < imports_len; i += 1) {
             imports[i].mod = InputStream_readName(&in);
             imports[i].name = InputStream_readName(&in);
@@ -378,17 +716,32 @@ int main(int argc, char **argv) {
                     imports[i].type_idx = InputStream_readLeb128_u32(&in);
                     const struct FuncType *func_type = &types[imports[i].type_idx];
                     switch (func_type->result->len) {
-                        case 0: fputs("void", out); break;
-                        case 1: fputs(WasmValType_toC(func_type->result->types[0]), out); break;
+                        case 0:
+                            fputs("void", out);
+                            if (n_c_files != 1) fputs("void", zig1_h_out);
+                            break;
+                        case 1:
+                            fputs(WasmValType_toC(func_type->result->types[0]), out);
+                            if (n_c_files != 1) fputs(WasmValType_toC(func_type->result->types[0]), zig1_h_out);
+                            break;
                         default: panic("multiple function returns not supported");
                     }
                     fprintf(out, " %s_%s(", imports[i].mod, imports[i].name);
-                    if (func_type->param->len == 0) fputs("void", out);
+                    if (n_c_files != 1) fprintf(zig1_h_out, " %s_%s(", imports[i].mod, imports[i].name);
+                    if (func_type->param->len == 0) {
+                        fputs("void", out);
+                        if (n_c_files != 1) fputs("void", zig1_h_out);
+                    }
                     for (uint32_t param_i = 0; param_i < func_type->param->len; param_i += 1) {
-                        if (param_i > 0) fputs(", ", out);
+                        if (param_i > 0) {
+                            fputs(", ", out);
+                            if (n_c_files != 1) fputs(", ", zig1_h_out);
+                        }
                         fputs(WasmValType_toC(func_type->param->types[param_i]), out);
+                        if (n_c_files != 1) fputs(WasmValType_toC(func_type->param->types[param_i]), zig1_h_out);
                     }
                     fputs(");\n", out);
+                    if (n_c_files != 1) fputs(");\n", zig1_h_out);
                     break;
                 }
 
@@ -400,6 +753,7 @@ int main(int argc, char **argv) {
             }
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     struct Func {
@@ -413,21 +767,35 @@ int main(int argc, char **argv) {
         for (uint32_t i = 0; i < len; i += 1) {
             funcs[i].type_idx = InputStream_readLeb128_u32(&in);
             const struct FuncType *func_type = &types[funcs[i].type_idx];
-            fputs("static ", out);
+            if (n_c_files == 1) fputs("static ", out);
             switch (func_type->result->len) {
-                case 0: fputs("void", out); break;
-                case 1: fputs(WasmValType_toC(func_type->result->types[0]), out); break;
+                case 0: fputs("void", out);
+                    if (n_c_files != 1) fputs("void", zig1_h_out);
+                    break;
+                case 1: fputs(WasmValType_toC(func_type->result->types[0]), out);
+                        if (n_c_files != 1) fputs(WasmValType_toC(func_type->result->types[0]), zig1_h_out);
+                        break;
                 default: panic("multiple function returns not supported");
             }
             fprintf(out, " f%" PRIu32 "(", i);
-            if (func_type->param->len == 0) fputs("void", out);
+            if (n_c_files != 1) fprintf(zig1_h_out, " f%" PRIu32 "(", i);
+            if (func_type->param->len == 0) {
+                fputs("void", out);
+                if (n_c_files != 1) fputs("void", zig1_h_out);
+            }
             for (uint32_t param_i = 0; param_i < func_type->param->len; param_i += 1) {
-                if (param_i > 0) fputs(", ", out);
+                if (param_i > 0) {
+                    fputs(", ", out);
+                    if (n_c_files != 1) fputs(", ", zig1_h_out);
+                }
                 fprintf(out, "%s", WasmValType_toC(func_type->param->types[param_i]));
+                if (n_c_files != 1) fprintf(zig1_h_out, "%s", WasmValType_toC(func_type->param->types[param_i]));
             }
             fputs(");\n", out);
+            if (n_c_files != 1) fputs(");\n", zig1_h_out);
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     struct Table {
@@ -450,10 +818,18 @@ int main(int argc, char **argv) {
             tables[i].type = ref_type;
             tables[i].limits = InputStream_readLimits(&in);
             if (tables[i].limits.min != tables[i].limits.max) panic("growable table not supported");
-            fprintf(out, "static void (*t%" PRIu32 "[UINT32_C(%" PRIu32 ")])(void);\n",
-                    i, tables[i].limits.min);
+            if (n_c_files == 1){
+                fprintf(out, "static void (*t%" PRIu32 "[UINT32_C(%" PRIu32 ")])(void);\n",
+                        i, tables[i].limits.min);
+            } else {
+                fprintf(out, "void (*t%" PRIu32 "[UINT32_C(%" PRIu32 ")])(void);\n",
+                        i, tables[i].limits.min);
+                fprintf(zig1_h_out, "extern void (*t%" PRIu32 "[UINT32_C(%" PRIu32 ")])(void);\n",
+                        i, tables[i].limits.min);
+            }
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     struct Mem {
@@ -466,11 +842,21 @@ int main(int argc, char **argv) {
         if (mems == NULL) panic("out of memory");
         for (uint32_t i = 0; i < mems_len; i += 1) {
             mems[i].limits = InputStream_readLimits(&in);
-            fprintf(out, "static uint8_t *m%" PRIu32 ";\n"
-                    "static uint32_t p%" PRIu32 ";\n"
-                    "static uint32_t c%" PRIu32 ";\n", i, i, i);
+            if (n_c_files == 1){
+                fprintf(out, "static uint8_t *m%" PRIu32 ";\n"
+                        "static uint32_t p%" PRIu32 ";\n"
+                        "static uint32_t c%" PRIu32 ";\n", i, i, i);
+            } else {
+                fprintf(out, "uint8_t *m%" PRIu32 ";\n"
+                        "uint32_t p%" PRIu32 ";\n"
+                        "uint32_t c%" PRIu32 ";\n", i, i, i);
+                fprintf(zig1_h_out, "extern uint8_t *m%" PRIu32 ";\n"
+                        "extern uint32_t p%" PRIu32 ";\n"
+                        "extern uint32_t c%" PRIu32 ";\n", i, i, i);
+            }
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     struct Global {
@@ -485,6 +871,7 @@ int main(int argc, char **argv) {
         for (uint32_t i = 0; i < len; i += 1) {
             int64_t val_type = InputStream_readLeb128_i64(&in);
             enum WasmMut mut = InputStream_readByte(&in);
+            if (n_c_files != 1) fprintf(zig1_h_out, "extern %s%s g%" PRIu32 ";\n", WasmMut_toC(mut), WasmValType_toC(val_type), i);
             fprintf(out, "%s%s g%" PRIu32 " = ", WasmMut_toC(mut), WasmValType_toC(val_type), i);
             renderExpr(out, &in);
             fputs(";\n", out);
@@ -492,6 +879,7 @@ int main(int argc, char **argv) {
             globals[i].val_type = val_type;
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     (void)InputStream_skipToSection(&in, WasmSectionId_export);
@@ -506,15 +894,44 @@ int main(int argc, char **argv) {
                     if (idx < imports_len) panic("can't export an import");
                     const struct FuncType *func_type = &types[funcs[idx - imports_len].type_idx];
                     switch (func_type->result->len) {
-                        case 0: fputs("void", out); break;
-                        case 1: fputs(WasmValType_toC(func_type->result->types[0]), out); break;
+                        case 0:
+                            fputs("void", out);
+                            if (n_c_files != 1) {
+                                fputs("void", zig1_h_out);
+                            }
+                            break;
+                        case 1:
+                            fputs(WasmValType_toC(func_type->result->types[0]), out);
+                            if (n_c_files != 1) {
+                                fputs(WasmValType_toC(func_type->result->types[0]), zig1_h_out);
+                            }
+                            break;
                         default: panic("multiple function returns not supported");
                     }
                     fprintf(out, " %s_%s(", mod, name);
-                    if (func_type->param->len == 0) fputs("void", out);
+                    if (n_c_files != 1) {
+                        fprintf(zig1_h_out, " %s_%s(", mod, name);
+                    }
+                    if (func_type->param->len == 0) {
+                        fputs("void", out);
+                        if (n_c_files != 1) {
+                            fputs("void", zig1_h_out);
+                        }
+                    }
                     for (uint32_t param_i = 0; param_i < func_type->param->len; param_i += 1) {
-                        if (param_i > 0) fputs(", ", out);
+                        if (param_i > 0) {
+                            fputs(", ", out);
+                            if (n_c_files != 1) {
+                                fputs(", ", zig1_h_out);
+                            }
+                        }
                         fprintf(out, "%s l%" PRIu32, WasmValType_toC(func_type->param->types[param_i]), param_i);
+                        if (n_c_files != 1) {
+                            fprintf(zig1_h_out, "%s l%" PRIu32, WasmValType_toC(func_type->param->types[param_i]), param_i);
+                        }
+                    }
+                    if (n_c_files != 1) {
+                        fputs(");\n", zig1_h_out);
                     }
                     fprintf(out,
                             ") {\n"
@@ -531,6 +948,9 @@ int main(int argc, char **argv) {
 
                 case 0x02:
                     fprintf(out, "uint8_t **const %s_%s = &m%" PRIu32 ";\n", mod, name, idx);
+                    if (n_c_files != 1) {
+                        fprintf(zig1_h_out, "extern uint8_t **const %s_%s;\n", mod, name);
+                    }
                     break;
 
                 default: panic("unsupported export kind");
@@ -538,12 +958,17 @@ int main(int argc, char **argv) {
             free(name);
         }
         fputc('\n', out);
+        if (n_c_files != 1) fputc('\n', zig1_h_out);
     }
 
     (void)InputStream_skipToSection(&in, WasmSectionId_elem);
     {
         uint32_t len = InputStream_readLeb128_u32(&in);
-        fputs("static void init_elem(void) {\n", out);
+        if (n_c_files == 1) {
+            fputs("static void init_elem(void) {\n", out);
+        } else {
+            fputs("void init_elem(void) {\n", out);
+        }
         for (uint32_t segment_i = 0; segment_i < len; segment_i += 1) {
             uint32_t table_idx = 0;
             uint32_t elem_type = InputStream_readLeb128_u32(&in);
@@ -572,12 +997,30 @@ int main(int argc, char **argv) {
         uint32_t *param_stash = malloc(sizeof(uint32_t) * max_param_len);
 
         uint32_t len = InputStream_readLeb128_u32(&in);
+        char tmpstrbuffer[64];
+        if (n_c_files > len)
+            n_c_files = len;
+        if (n_c_files > 1){
+            n_c_files_stream = malloc(sizeof(FILE *) * n_c_files);
+            if (n_c_files_stream == NULL)
+                panic("out of memory");
+            for (uint32_t n = 0; n < n_c_files; ++n) {
+                sprintf(tmpstrbuffer, "zig1_%" PRIu32 ".c", n);
+                n_c_files_stream[n] = fopen(tmpstrbuffer, "wb");
+                if (n_c_files_stream[n] == NULL) panic("unable to open file");
+                fputs("#include \"zig1.h\"\n\n", n_c_files_stream[n]);
+            }
+        }
         for (uint32_t func_i = 0; func_i < len; func_i += 1) {
             FuncGen_reset(&fg);
 
             InputStream_readLeb128_u32(&in);
             const struct FuncType *func_type = &types[funcs[func_i].type_idx];
-            fputs("static ", out);
+            if (n_c_files == 1) {
+                fputs("static ", out);
+            } else {
+                out = n_c_files_stream[func_i % n_c_files];
+            }
             switch (func_type->result->len) {
                 case 0: fputs("void", out); break;
                 case 1: fputs(WasmValType_toC(func_type->result->types[0]), out); break;
@@ -2253,12 +2696,20 @@ int main(int argc, char **argv) {
             }
             fputs("}\n\n", out);
         }
+    if (n_c_files != 1) {
+        out = wasi_gen_h_out;
+        free(n_c_files_stream);
+    }
     }
 
     (void)InputStream_skipToSection(&in, WasmSectionId_data);
     {
         uint32_t len = InputStream_readLeb128_u32(&in);
-        fputs("static void init_data(void) {\n", out);
+        if (n_c_files == 1) {
+            fputs("static void init_data(void) {\n", out);
+        } else {
+            fputs("void init_data(void) {\n", out);
+        }
         for (uint32_t i = 0; i < mems_len; i += 1)
             fprintf(out, "    p%" PRIu32 " = UINT32_C(%" PRIu32 ");\n"
                     "    c%" PRIu32 " = p%" PRIu32 ";\n"