## Quant mix recipe created using Thireus' GGUF Tool Suite - https://gguf.thireus.com/ # Model name: DeepSeek-R1-0528 # Link to the original model: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528 ## Model head & embeddings — qbits: 32 8 ^output_norm\.weight$=f32 ^token_embd\.weight$=q8_0 ^output\.weight$=q8_0 ## Special attention kernels — single-quant only (llama-quantize takes care of it) — qbits: 8 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_k_b\.weight$=q8_0 ## Multi-headed attention parameters — qbits: 32 5 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_kv_b\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_q_b\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_kv_a_mqa\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_output\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_kv_a_norm\.weight$=f32 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_q_a\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_v_b\.weight$=iq5_ks_r4 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_q_a_norm\.weight$=f32 ^blk\.([0-9]|[1-5][0-9]|60)\.attn_norm\.weight$=f32 ## Core FFN weights — qbits: 32 8 ^blk\.([3-9]|[1-5][0-9]|60)\.ffn_gate_inp\.weight$=f32 ^blk\.([0-9]|[1-5][0-9]|60)\.ffn_norm\.weight$=f32 ^blk\.[0-2]\.ffn_down\.weight$=q8_0 ^blk\.[0-2]\.ffn_gate\.weight$=q8_0 ^blk\.[0-2]\.ffn_up\.weight$=q8_0 ## Other tensors — qbits: 32 ^blk\.([3-9]|[1-5][0-9]|60)\.exp_probs_b\.bias$=f32 ## GPU-loaded ffn_*_shexp # ffn_down_shexp (down-projection) — qbits: 8 ^blk\.([3-9]|[1-5][0-9]|60)\.ffn_down_shexp\.weight$=q8_0 # ffn_up_shexp (up-projection) — qbits: 8 ^blk\.([3-9]|[1-5][0-9]|60)\.ffn_up_shexp\.weight$=q8_0 # ffn_gate_shexp (gate-projection) — qbits: 8 ^blk\.([3-9]|[1-5][0-9]|60)\.ffn_gate_shexp\.weight$=q8_0 ## CPU-friendly ffn_*_exps # ffn_down_exps (down-extraction) — qbits: 6 5 4 ^blk\.(3[2-9]|4[0-2]|4[4-9]|51)\.ffn_down_exps\.weight$=iq6_k ^blk\.(12|2[0-9]|3[0-1]|43|50|5[2-8]|60)\.ffn_down_exps\.weight$=iq5_ks_r4 ^blk\.([3-9]|1[0-1]|1[3-9]|59)\.ffn_down_exps\.weight$=iq4_ks # ffn_up_exps (up-extraction) — qbits: 6 5 4 ^blk\.(44|47|55)\.ffn_up_exps\.weight$=iq6_k ^blk\.(24|27|3[2-9]|4[0-3]|4[5-6]|4[8-9]|5[0-4]|5[6-8]|60)\.ffn_up_exps\.weight$=iq5_ks_r4 ^blk\.([3-9]|1[0-9]|2[0-3]|2[5-6]|2[8-9]|3[0-1]|59)\.ffn_up_exps\.weight$=iq4_ks # ffn_gate_exps (gate-extraction) — qbits: 6 5 4 ^blk\.(44|47|55)\.ffn_gate_exps\.weight$=iq6_k ^blk\.(24|27|3[2-9]|4[0-3]|4[5-6]|4[8-9]|5[0-4]|5[6-8]|60)\.ffn_gate_exps\.weight$=iq5_ks_r4 ^blk\.([3-9]|1[0-9]|2[0-3]|2[5-6]|2[8-9]|3[0-1]|59)\.ffn_gate_exps\.weight$=iq4_ks ## Summary of tensor sizes per class # GPU Total: 13.733 GiB (100.0%) | 13.73 GiB max, if all were q8_0 | 13.73 GiB min, if all were q8_0 # CPU Total: 382.156 GiB (75.8%) | 504.33 GiB max, if all were iq6_k | 323.53 GiB min, if all were iq4_ks # GPU+CPU Total: 395.889 GiB (87.9%) ## Summary of tensor counts and bpw per qtype # # GPU-loaded quants: # QTYPE Count BPW Assigned GiB % Assigned Max GiB (all) # +f32 361 32.0 0.40 GiB - - # +q8_0 61 8.5 0.51 GiB - - # q8_0 185 8.5 5.54 GiB 100.0% 5.54 # +iq5_ks_r4 366 5.25 7.29 GiB - - # # CPU-friendly quants: # QTYPE Count BPW Assigned GiB % Assigned Max GiB (all) # iq6_k 24 6.625 69.56 GiB 13.8% 504.33 # iq5_ks_r4 77 5.25 176.86 GiB 44.3% 399.66 # iq4_ks 73 4.25 135.73 GiB 42.0% 323.53 # # -Average BPW: 5.0601 # # -Notes: # - '+' means user-defined pre-assigned tensors, or tensor missing from csv data or f32 tensors # - Recipe produced on the 2025-09-05 07:38:43 UTC+0000 using Thireus' GGUF tools (https://gguf.thireus.com/) # - Script SHA-256: 90e3c2fafeb4aa4360e9fed16bf2aa8f5be90c24da90bd656f38ca48bf77bd1d # - Calibration dataset 'ppl_results.csv' SHA-256: b45b30a282e29922fc46ea4a030f1aa27df5be790c9d921cfac0fba27adc3faa # - tensors.bf16.map SHA-256: f264323f789d0da78bc21ccf208cbb16709c5808c9c1f939a0c78f7c03c1ece1 # - tensors.bf16.map model name: DeepSeek-R1-0528-THIREUS-BF16-SPECIAL_TENSOR-01148-of-01148 # - tensors.iq6_k.map SHA-256: c2b301156703fd3d360a6a9406d5079bd6625c2f7557f89c7de632df78eed822 # - tensors.iq6_k.map model name: DeepSeek-R1-0528-THIREUS-IQ6_K-SPECIAL_TENSOR-01148-of-01148 # - tensors.iq5_ks_r4.map SHA-256: 5916f9ee20192160667abccfb2e1f54fb110f42150eb0c9f97d19b8a52732a56 # - tensors.iq5_ks_r4.map model name: DeepSeek-R1-0528-THIREUS-IQ5_KS_R4-SPECIAL_TENSOR-01148-of-01148 # - tensors.iq4_ks.map SHA-256: d5f6bb2f932668cbea57f0910e235759b56e7ae5f0e0e1420af7c8e40d824a51 # - tensors.iq4_ks.map model name: DeepSeek-R1-0528-THIREUS-IQ4_KS-SPECIAL_TENSOR-01148-of-01148 # - tensors.q8_0.map SHA-256: 8d064fb71d986348b38df6d0517ba527dd5bbd25cd1f45535971087f042f1b32 # - tensors.q8_0.map model name: DeepSeek-R1-0528-THIREUS-Q8_0-SPECIAL_TENSOR-01148-of-01148 # - tensors.iq1_m_r4.map SHA-256: 895e207c15f5e8a2c81f5b7061b0fe3a64a481b83c9499fb75ad7724f92d6c25 # - tensors.iq1_m_r4.map model name: DeepSeek-R1-0528-THIREUS-IQ1_M_R4-SPECIAL_TENSOR-01148-of-01148 # - GPG signatures: PASSED # - Command used: # ../../quant_assign.py ppl_results.csv --tolerance 0.01 --cpu-irq-k 1.5 --gpu-irq-k 1.5 --gpu-assign-qtype iq5_ks_r4 \ # --cpu-tensors-max-size 380 --gpu-tensors-max-size 100% --exponential-factor 8 --cpu-tensors \ # 'blk\.([3-9]|[1-5][0-9]|60)\.ffn_down_exps\.weight' 'blk\.([3-9]|[1-5][0-9]|60)\.ffn_up_exps\.weight' \ # 'blk\.([3-9]|[1-5][0-9]|60)\.ffn_gate_exps\.weight' --gpu-tensors '.*' --cpu-quants iq6_k iq5_ks_r4 iq4_ks \ # --gpu-quants q8_0 --gpu-assign-tensors 'blk\.([0-9]|[1-5][0-9]|60)\.attn_k_b\.weight=q8_0' --harmonize-tensors \ # '^blk\..*\.ffn_up_exps.*,blk\..*\.ffn_gate_exps.*' --harmonization-technique 3 ## THE END!