/* LzmaSpec.cpp -- LZMA Reference Decoder 2015-06-14 : Igor Pavlov : Public domain */ // This code implements LZMA file decoding according to LZMA specification. // This code is not optimized for speed. #include #ifdef _MSC_VER #pragma warning(disable : 4710) // function not inlined #pragma warning(disable : 4996) // This function or variable may be unsafe #endif typedef unsigned char Byte; typedef unsigned short UInt16; #ifdef _LZMA_UINT32_IS_ULONG typedef unsigned long UInt32; #else typedef unsigned int UInt32; #endif #if defined(_MSC_VER) || defined(__BORLANDC__) typedef unsigned __int64 UInt64; #else typedef unsigned long long int UInt64; #endif struct CInputStream { FILE *File; UInt64 Processed; void Init() { Processed = 0; } Byte ReadByte() { int c = getc(File); if (c < 0) throw "Unexpected end of file"; Processed++; return (Byte)c; } }; struct COutStream { FILE *File; UInt64 Processed; void Init() { Processed = 0; } void WriteByte(Byte b) { if (putc(b, File) == EOF) throw "File writing error"; Processed++; } }; class COutWindow { Byte *Buf; UInt32 Pos; UInt32 Size; bool IsFull; public: unsigned TotalPos; COutStream OutStream; COutWindow(): Buf(NULL) {} ~COutWindow() { delete []Buf; } void Create(UInt32 dictSize) { Buf = new Byte[dictSize]; Pos = 0; Size = dictSize; IsFull = false; TotalPos = 0; } void PutByte(Byte b) { TotalPos++; Buf[Pos++] = b; if (Pos == Size) { Pos = 0; IsFull = true; } OutStream.WriteByte(b); } Byte GetByte(UInt32 dist) const { return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; } void CopyMatch(UInt32 dist, unsigned len) { for (; len > 0; len--) PutByte(GetByte(dist)); } bool CheckDistance(UInt32 dist) const { return dist <= Pos || IsFull; } bool IsEmpty() const { return Pos == 0 && !IsFull; } }; #define kNumBitModelTotalBits 11 #define kNumMoveBits 5 typedef UInt16 CProb; #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) #define INIT_PROBS(p) \ { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } class CRangeDecoder { UInt32 Range; UInt32 Code; void Normalize(); public: CInputStream *InStream; bool Corrupted; bool Init(); bool IsFinishedOK() const { return Code == 0; } UInt32 DecodeDirectBits(unsigned numBits); unsigned DecodeBit(CProb *prob); }; bool CRangeDecoder::Init() { Corrupted = false; Range = 0xFFFFFFFF; Code = 0; Byte b = InStream->ReadByte(); for (int i = 0; i < 4; i++) Code = (Code << 8) | InStream->ReadByte(); if (b != 0 || Code == Range) Corrupted = true; return b == 0; } #define kTopValue ((UInt32)1 << 24) void CRangeDecoder::Normalize() { if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | InStream->ReadByte(); } } UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) { UInt32 res = 0; do { Range >>= 1; Code -= Range; UInt32 t = 0 - ((UInt32)Code >> 31); Code += Range & t; if (Code == Range) Corrupted = true; Normalize(); res <<= 1; res += t + 1; } while (--numBits); return res; } unsigned CRangeDecoder::DecodeBit(CProb *prob) { unsigned v = *prob; UInt32 bound = (Range >> kNumBitModelTotalBits) * v; unsigned symbol; if (Code < bound) { v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; Range = bound; symbol = 0; } else { v -= v >> kNumMoveBits; Code -= bound; Range -= bound; symbol = 1; } *prob = (CProb)v; Normalize(); return symbol; } unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) { unsigned m = 1; unsigned symbol = 0; for (unsigned i = 0; i < numBits; i++) { unsigned bit = rc->DecodeBit(&probs[m]); m <<= 1; m += bit; symbol |= (bit << i); } return symbol; } template class CBitTreeDecoder { CProb Probs[(unsigned)1 << NumBits]; public: void Init() { INIT_PROBS(Probs); } unsigned Decode(CRangeDecoder *rc) { unsigned m = 1; for (unsigned i = 0; i < NumBits; i++) m = (m << 1) + rc->DecodeBit(&Probs[m]); return m - ((unsigned)1 << NumBits); } unsigned ReverseDecode(CRangeDecoder *rc) { return BitTreeReverseDecode(Probs, NumBits, rc); } }; #define kNumPosBitsMax 4 #define kNumStates 12 #define kNumLenToPosStates 4 #define kNumAlignBits 4 #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kMatchMinLen 2 class CLenDecoder { CProb Choice; CProb Choice2; CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; CBitTreeDecoder<8> HighCoder; public: void Init() { Choice = PROB_INIT_VAL; Choice2 = PROB_INIT_VAL; HighCoder.Init(); for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) { LowCoder[i].Init(); MidCoder[i].Init(); } } unsigned Decode(CRangeDecoder *rc, unsigned posState) { if (rc->DecodeBit(&Choice) == 0) return LowCoder[posState].Decode(rc); if (rc->DecodeBit(&Choice2) == 0) return 8 + MidCoder[posState].Decode(rc); return 16 + HighCoder.Decode(rc); } }; unsigned UpdateState_Literal(unsigned state) { if (state < 4) return 0; else if (state < 10) return state - 3; else return state - 6; } unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } #define LZMA_DIC_MIN (1 << 12) class CLzmaDecoder { public: CRangeDecoder RangeDec; COutWindow OutWindow; bool markerIsMandatory; unsigned lc, pb, lp; UInt32 dictSize; UInt32 dictSizeInProperties; void DecodeProperties(const Byte *properties) { unsigned d = properties[0]; if (d >= (9 * 5 * 5)) throw "Incorrect LZMA properties"; lc = d % 9; d /= 9; pb = d / 5; lp = d % 5; dictSizeInProperties = 0; for (int i = 0; i < 4; i++) dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); dictSize = dictSizeInProperties; if (dictSize < LZMA_DIC_MIN) dictSize = LZMA_DIC_MIN; } CLzmaDecoder(): LitProbs(NULL) {} ~CLzmaDecoder() { delete []LitProbs; } void Create() { OutWindow.Create(dictSize); CreateLiterals(); } int Decode(bool unpackSizeDefined, UInt64 unpackSize); private: CProb *LitProbs; void CreateLiterals() { LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; } void InitLiterals() { UInt32 num = (UInt32)0x300 << (lc + lp); for (UInt32 i = 0; i < num; i++) LitProbs[i] = PROB_INIT_VAL; } void DecodeLiteral(unsigned state, UInt32 rep0) { unsigned prevByte = 0; if (!OutWindow.IsEmpty()) prevByte = OutWindow.GetByte(1); unsigned symbol = 1; unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); CProb *probs = &LitProbs[(UInt32)0x300 * litState]; if (state >= 7) { unsigned matchByte = OutWindow.GetByte(rep0 + 1); do { unsigned matchBit = (matchByte >> 7) & 1; matchByte <<= 1; unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); symbol = (symbol << 1) | bit; if (matchBit != bit) break; } while (symbol < 0x100); } while (symbol < 0x100) symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); OutWindow.PutByte((Byte)(symbol - 0x100)); } CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; CBitTreeDecoder AlignDecoder; CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; void InitDist() { for (unsigned i = 0; i < kNumLenToPosStates; i++) PosSlotDecoder[i].Init(); AlignDecoder.Init(); INIT_PROBS(PosDecoders); } unsigned DecodeDistance(unsigned len) { unsigned lenState = len; if (lenState > kNumLenToPosStates - 1) lenState = kNumLenToPosStates - 1; unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); if (posSlot < 4) return posSlot; unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); if (posSlot < kEndPosModelIndex) dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); else { dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; dist += AlignDecoder.ReverseDecode(&RangeDec); } return dist; } CProb IsMatch[kNumStates << kNumPosBitsMax]; CProb IsRep[kNumStates]; CProb IsRepG0[kNumStates]; CProb IsRepG1[kNumStates]; CProb IsRepG2[kNumStates]; CProb IsRep0Long[kNumStates << kNumPosBitsMax]; CLenDecoder LenDecoder; CLenDecoder RepLenDecoder; void Init() { InitLiterals(); InitDist(); INIT_PROBS(IsMatch); INIT_PROBS(IsRep); INIT_PROBS(IsRepG0); INIT_PROBS(IsRepG1); INIT_PROBS(IsRepG2); INIT_PROBS(IsRep0Long); LenDecoder.Init(); RepLenDecoder.Init(); } }; #define LZMA_RES_ERROR 0 #define LZMA_RES_FINISHED_WITH_MARKER 1 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize) { if (!RangeDec.Init()) return LZMA_RES_ERROR; Init(); UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; unsigned state = 0; for (;;) { if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) if (RangeDec.IsFinishedOK()) return LZMA_RES_FINISHED_WITHOUT_MARKER; unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0) { if (unpackSizeDefined && unpackSize == 0) return LZMA_RES_ERROR; DecodeLiteral(state, rep0); state = UpdateState_Literal(state); unpackSize--; continue; } unsigned len; if (RangeDec.DecodeBit(&IsRep[state]) != 0) { if (unpackSizeDefined && unpackSize == 0) return LZMA_RES_ERROR; if (OutWindow.IsEmpty()) return LZMA_RES_ERROR; if (RangeDec.DecodeBit(&IsRepG0[state]) == 0) { if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0) { state = UpdateState_ShortRep(state); OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); unpackSize--; continue; } } else { UInt32 dist; if (RangeDec.DecodeBit(&IsRepG1[state]) == 0) dist = rep1; else { if (RangeDec.DecodeBit(&IsRepG2[state]) == 0) dist = rep2; else { dist = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = dist; } len = RepLenDecoder.Decode(&RangeDec, posState); state = UpdateState_Rep(state); } else { rep3 = rep2; rep2 = rep1; rep1 = rep0; len = LenDecoder.Decode(&RangeDec, posState); state = UpdateState_Match(state); rep0 = DecodeDistance(len); if (rep0 == 0xFFFFFFFF) return RangeDec.IsFinishedOK() ? LZMA_RES_FINISHED_WITH_MARKER : LZMA_RES_ERROR; if (unpackSizeDefined && unpackSize == 0) return LZMA_RES_ERROR; if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) return LZMA_RES_ERROR; } len += kMatchMinLen; bool isError = false; if (unpackSizeDefined && unpackSize < len) { len = (unsigned)unpackSize; isError = true; } OutWindow.CopyMatch(rep0 + 1, len); unpackSize -= len; if (isError) return LZMA_RES_ERROR; } } static void Print(const char *s) { fputs(s, stdout); } static void PrintError(const char *s) { fputs(s, stderr); } #define CONVERT_INT_TO_STR(charType, tempSize) \ void ConvertUInt64ToString(UInt64 val, char *s) { char temp[32]; unsigned i = 0; while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } *s++ = (char)('0' + (unsigned)val); while (i != 0) { i--; *s++ = temp[i]; } *s = 0; } void PrintUInt64(const char *title, UInt64 v) { Print(title); Print(" : "); char s[32]; ConvertUInt64ToString(v, s); Print(s); Print(" bytes \n"); } int main2(int numArgs, const char *args[]) { Print("\nLZMA Reference Decoder 15.00 : Igor Pavlov : Public domain : 2015-04-16\n"); if (numArgs == 1) Print("\nUse: lzmaSpec a.lzma outFile"); if (numArgs != 3) throw "you must specify two parameters"; CInputStream inStream; inStream.File = fopen(args[1], "rb"); inStream.Init(); if (inStream.File == 0) throw "Can't open input file"; CLzmaDecoder lzmaDecoder; lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+"); lzmaDecoder.OutWindow.OutStream.Init(); if (inStream.File == 0) throw "Can't open output file"; Byte header[13]; int i; for (i = 0; i < 13; i++) header[i] = inStream.ReadByte(); lzmaDecoder.DecodeProperties(header); printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb); printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties); printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize); UInt64 unpackSize = 0; bool unpackSizeDefined = false; for (i = 0; i < 8; i++) { Byte b = header[5 + i]; if (b != 0xFF) unpackSizeDefined = true; unpackSize |= (UInt64)b << (8 * i); } lzmaDecoder.markerIsMandatory = !unpackSizeDefined; Print("\n"); if (unpackSizeDefined) PrintUInt64("Uncompressed Size", unpackSize); else Print("End marker is expected\n"); lzmaDecoder.RangeDec.InStream = &inStream; Print("\n"); lzmaDecoder.Create(); int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize); PrintUInt64("Read ", inStream.Processed); PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed); if (res == LZMA_RES_ERROR) throw "LZMA decoding error"; else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER) Print("Finished without end marker"); else if (res == LZMA_RES_FINISHED_WITH_MARKER) { if (unpackSizeDefined) { if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize) throw "Finished with end marker before than specified size"; Print("Warning: "); } Print("Finished with end marker"); } else throw "Internal Error"; Print("\n"); if (lzmaDecoder.RangeDec.Corrupted) { Print("\nWarning: LZMA stream is corrupted\n"); } return 0; } int #ifdef _MSC_VER __cdecl #endif main(int numArgs, const char *args[]) { try { return main2(numArgs, args); } catch (const char *s) { PrintError("\nError:\n"); PrintError(s); PrintError("\n"); return 1; } catch(...) { PrintError("\nError\n"); return 1; } }