/*************************************************************************************************** * Pollard–Kangaroo (wrap-aware, user-configurable k, live counter, loop detector, restart counter) * Coded by DooKoo2 * Load/Save DP tech by NoMachine * SSD rework (strong DP on SSD instead of RAM. Bloom – RAM, DP_table – SSD) * Added a few improvements with runtime security * Patched for higher performance (dual-hash DP table, better Bloom/DP sizing, batched I/O) * * g++ Mark1.cpp Int.cpp SECP256K1.cpp Point.cpp Random.cpp IntMod.cpp IntGroup.cpp Timer.cpp \ * -O3 -march=native -funroll-loops -ftree-vectorize -fstrict-aliasing -fno-semantic-interposition \ * -fvect-cost-model=unlimited -fno-trapping-math -fipa-ra -fipa-modref -flto -fassociative-math \ * -fopenmp -mavx2 -mbmi2 -madx -std=c++17 -pthread -o Mark1 * ***************************************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Int.h" #include "Point.h" #include "SECP256K1.h" #include "IntGroup.h" #include "simd_block_bloom.h" // ─── Misc ───────────────────────────────────────────────────────────────────── static inline uint64_t splitmix64(uint64_t x){ x += 0x9E3779B97F4A7C15ULL; x = (x ^ (x>>30)) * 0xBF58476D1CE4E5B9ULL; x = (x ^ (x>>27)) * 0x94D049BB133111EBULL; return x ^ (x>>31); } static std::string humanBytes(size_t bytes){ constexpr const char* u[]{"B","Kb","Mb","Gb","Tb"}; double v = double(bytes); int s = 0; while(v >= 1024.0 && s < 4){ v /= 1024.0; ++s; } std::ostringstream o; o << std::fixed << std::setprecision((v<10)?2:(v<100)?1:0) << v << u[s]; return o.str(); } static inline void intCopy(Int& d,const Int& s){ d.Set(&const_cast(s)); } static inline uint64_t IntLow64(const Int& n){ return n.bits64[0]; } static inline int bitlen(const Int& n){ return const_cast(n).GetBitLength(); } static inline bool intGE(const Int& a,const Int& b){ return const_cast(a).IsGreaterOrEqual(&const_cast(b)); } // ─── Scalar256 ──────────────────────────────────────────────────────────────── struct Scalar256{ uint64_t w[4]; }; static inline void intToScalar(const Int& n, Scalar256& s){ s.w[0]=n.bits64[0]; s.w[1]=n.bits64[1]; s.w[2]=n.bits64[2]; s.w[3]=n.bits64[3]; } static inline void scalarToInt(const Scalar256& s, Int& n){ n.SetInt32(0); for(int i=3;i>=0;--i){ n.ShiftL(64); Int tmp(s.w[i]); n.Add(&tmp); } } using fp_t = uint64_t; // ─── SSD DP storage ──────────────────────────────────────────────────────────── #pragma pack(push,1) struct DPSlot{ fp_t fp; Scalar256 key; }; #pragma pack(pop) static_assert(sizeof(DPSlot)==40); struct DPStorage{ size_t cap=0, mapBytes=0; int fd=-1; DPSlot* slots=nullptr; std::unique_ptr[]> st_used, st_lock; std::atomic size{0}; std::atomic dirty{0}; std::atomic flush_counter{0}; std::atomic enable_flush{true}; void init(const std::string& path,size_t c); void flushIfNeeded(size_t slotIdx) noexcept; void fullSync() noexcept; void close(); }; static DPStorage dp; static constexpr size_t FLUSH_STEP = 1ull<<24; // ─── DPStorage impl ─────────────────────────────────────────────────────────── void DPStorage::init(const std::string& path,size_t c){ cap=c; mapBytes=cap*sizeof(DPSlot); int flags = O_RDWR | O_CREAT; #ifdef O_DIRECT flags |= O_DIRECT; #endif #ifdef O_SYNC flags |= O_SYNC; #endif fd = ::open(path.c_str(),flags,0644); if(fd<0){ perror("open(dp)"); throw std::runtime_error("open(dp)"); } if(posix_fallocate(fd,0,mapBytes)){ perror("fallocate(dp)"); throw std::runtime_error("fallocate(dp)"); } void* p = mmap(nullptr,mapBytes,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0); if(p==MAP_FAILED){ perror("mmap(dp)"); throw std::runtime_error("mmap(dp)"); } slots = static_cast(p); st_used = std::make_unique[]>(cap); st_lock = std::make_unique[]>(cap); #pragma omp parallel for schedule(static) for(size_t i=0;i(slots) + start*sizeof(DPSlot), len, MS_ASYNC); } } void DPStorage::fullSync() noexcept{ msync(slots,mapBytes,MS_SYNC); } void DPStorage::close(){ if(slots) munmap(slots,mapBytes); if(fd>=0) ::close(fd); } // ─── Curve ──────────────────────────────────────────────────────────────────── static const char *P_HEX="FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F"; static const char *N_HEX="FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141"; static Int P_PRIME, ORDER_N; static Secp256K1 secp; static inline Point addP(const Point& a,const Point& b){ if(const_cast(a.x).IsZero() && const_cast(a.y).IsZero()) return b; if(const_cast(b.x).IsZero() && const_cast(b.y).IsZero()) return a; return secp.AddDirect(const_cast(a),const_cast(b)); } static inline Point mulP(const Int& k){ Int t(k); return secp.ComputePublicKey(&t); } // ─── Bloom + DP API ─────────────────────────────────────────────────────────── static simd_bloom::SimdBlockFilterFixed<>* bloom=nullptr; static std::atomic dpDone{0}; inline bool sameScalar(const Scalar256& a,const Scalar256& b){ return std::memcmp(&a,&b,sizeof(Scalar256))==0; } // ─── Dual-hash DP ──────────────────────────────────────────────────────────── bool dp_insert_unique(fp_t fp,const Int& idx){ Int t(idx); t.Mod(&ORDER_N); Scalar256 key; intToScalar(t,key); size_t mask = dp.cap - 1; size_t h1 = fp & mask; size_t h2 = ((fp << 1) | 1) & mask; if(h2 == 0) h2 = 1; size_t h = h1; for(size_t i=0;i(&it),sizeof(it)); ++cnt; } std::cout<<"Saved "<(&it),sizeof(it))){ Int p; p.Set32Bytes(it.priv); if(dp_insert_unique(it.fp,p)) bloom->Add(uint32_t(it.fp)); if((++n & 0xFFFFF) == 0) std::cout<<"\rLoaded "< splitRange(const Int& A,const Int& total,unsigned parts){ std::vector seg(parts); Int chunk(total); Int div((uint64_t)parts); chunk.Div(&div,nullptr); Int lenLast(total); if(parts>1){ Int t(chunk); Int m((uint64_t)(parts-1)); t.Mult(&m); lenLast.Sub(&t); } for(unsigned i=0;i> (64 - k)); } explicit xoshiro256ss(uint64_t seed = 1) { for (int i = 0; i < 4; ++i) { seed = splitmix64(seed); s[i] = seed; } } inline uint64_t operator()() noexcept { return next(); } inline uint64_t next() noexcept { const uint64_t result = rotl(s[1] * 5, 7) * 9; const uint64_t t = s[1] << 17; s[2] ^= s[0]; s[3] ^= s[1]; s[1] ^= s[2]; s[0] ^= s[3]; s[2] ^= t; s[3] = rotl(s[3], 45); return result; } using result_type = uint64_t; static constexpr uint64_t min() { return 0; } static constexpr uint64_t max() { return ~0ULL; } }; // ─── batch-EC-add ───────────────────────────────────────────────────────────── template static inline void batchAdd(Point* base,Point* plus){ std::array dX; for(unsigned i=0;i jumps; static void buildJumpTable(unsigned k){ jumps.resize(k); #pragma omp parallel for schedule(static) for(unsigned i=0;i hops{0}, restarts{0}; static std::atomic solved{false}; static Int privFound; static std::atomic found_tid{0}; static std::atomic winner_wraps{0}; static std::once_flag record_flag; // ─── wrap helper ────────────────────────────────────────────────────────────── static inline void addWrapCnt(Int& v,const Int& d,const Int& len,uint64_t& wraps){ v.Add(&const_cast(d)); if(intGE(v,len)){ v.Sub(&const_cast(len)); ++wraps; } } // ─── Traps (Phase-1) ───────────────────────────────────────────────────────── static constexpr unsigned K_DP = 512; static void buildDP_segment(const RangeSeg& seg,uint64_t target, unsigned k,unsigned bits,uint64_t seed){ const uint64_t mask = (1ULL< rd; std::array dist; std::array wraps{}; std::array cur, stepPts; const size_t BATCH_SIZE = 256; std::vector> batch; batch.reserve(BATCH_SIZE); auto rndMod=[&](Int& o){ o.SetInt32(0); int parts=(bitlen(seg.length)+63)/64; for(int p=0;p(seg.length)); }; for(unsigned i=0;i(dist[i])); scalar.Add(&const_cast(seg.start)); scalar.Mod(&ORDER_N); batch.emplace_back(fp,scalar); if(batch.size() >= BATCH_SIZE || made + batch.size() >= target){ #pragma omp critical(dp_insert) { for(auto& it: batch){ if(dp_insert_unique(it.first,it.second)){ bloom->Add(uint32_t(it.first)); ++made; if(made==target) break; } } batch.clear(); } } } stepPts[i] = jumps[h]; addWrapCnt(dist[i],step,seg.length,wraps[i]); } batchAdd(cur.data(),stepPts.data()); } if(!batch.empty()){ #pragma omp critical(dp_insert) { for(auto& it: batch){ if(dp_insert_unique(it.first,it.second)){ bloom->Add(uint32_t(it.first)); ++made; } } batch.clear(); } } } // ─── Phase-2: wild kangaroos ───────────────────────────────────────────────── static constexpr unsigned K = 512; static constexpr unsigned CACHE_LIMIT = 1024; struct PendingCheck{ fp_t fp; unsigned idx; }; static void worker(uint32_t tid,const RangeSeg& seg,const Point& pub, unsigned k,unsigned bits){ struct LoopDet{ uint64_t next,cnt,sig; void reset(uint64_t s){ next=1024; cnt=0; sig=s; } }; const uint64_t mask = (1ULL< rd; std::array dist; std::array wraps{}; std::array cur, stepPts; std::array loop; auto rndMod=[&](Int& o){ o.SetInt32(0); int parts=(bitlen(seg.length)+63)/64; for(int p=0;p(seg.length)); }; for(unsigned i=0;i cache; cache.reserve(CACHE_LIMIT); while(!solved.load()){ for(unsigned i=0;i(cur.data(),stepPts.data()); if(local >= FLUSH){ hops.fetch_add(local); local = 0; } for(unsigned i=0;i= CACHE_LIMIT){ #pragma omp critical(dp_query) { for(auto& item: cache){ if(!bloom->Find(uint32_t(item.fp))) continue; Int trap; if(!dp_find(item.fp,trap)) continue; Int dw(seg.length); Int w((uint64_t)wraps[item.idx]); dw.Mult(&w); dw.Add(&const_cast(dist[item.idx])); dw.Mod(&ORDER_N); Int priv; intCopy(priv,trap); priv.Sub(&dw); priv.Mod(&ORDER_N); Point tst = mulP(priv); if(tst.x.IsEqual(&const_cast(pub.x)) && tst.y.IsEqual(&const_cast(pub.y))) { std::call_once(record_flag,[]{}); intCopy(privFound,priv); found_tid.store(tid); winner_wraps.store(wraps[item.idx]); solved.store(true); } } } cache.clear(); if(solved.load()) return; } } } if(local) hops.fetch_add(local); } // ─── main ───────────────────────────────────────────────────────────────────── int main(int argc,char** argv) { /* init curve */ P_PRIME.SetBase16((char*)P_HEX); ORDER_N.SetBase16((char*)N_HEX); secp.Init(); /* ── CLI ── */ Int A,B; uint64_t traps=0; unsigned bits=12; size_t ramGB=8; Point pub; unsigned k_user=0; bool saveDP=false, loadDP=false; std::string dpFile; for(int i=1;i=52)?(1ULL<<(Lbits/2)) : uint64_t(std::ceil(range.ToDouble()/std::sqrt(range.ToDouble()))); } unsigned k = k_user? k_user : std::max(1u,Lbits/2); /* новые параметры */ constexpr double MAX_LOAD = 0.50; constexpr double bloomFactor = 10.0; size_t cap0 = size_t(std::ceil(double(traps) / MAX_LOAD)); size_t cap = 1; while(cap < cap0) cap <<= 1; dp.init("dp_table.bin",cap); size_t bloomBytes=size_t(traps*bloomFactor); std::cout<<"\n=========== Phase-0: Data summary ==========\n"; std::cout<<"DP table (SSD): "<(bloomBytes); unsigned th=std::max(1u,std::thread::hardware_concurrency()); auto segs=splitRange(A,range,th); uint64_t per=(traps+th-1)/th; buildJumpTable(k); // ─── Phase-1 ──────────────────────────────────────────────────────────── dp.enable_flush.store(false); std::cout<<"\n========== Phase-1: Building traps =========\n"; if(loadDP){ if(!loadDPBinary(dpFile)) return 1; }else{ std::thread progress([&]{ while(dpDone.load()(nowTick-lastStat).count(); lastStat=nowTick; uint64_t now=hops.load(); uint64_t delta=now-lastHops; lastHops=now; double sp=delta/dt; const char* u=" H/s"; if(sp>1e6){ sp/=1e6; u=" MH/s";} else if(sp>1e3){ sp/=1e3; u=" kH/s";} uint64_t sec=std::chrono::duration_cast(nowTick-t0).count(); std::cout<<"\rSpeed: "<( std::chrono::steady_clock::now()-t0).count(); uint64_t h=msTot/3'600'000,m=(msTot/60'000)%60,s=(msTot/1'000)%60,ms=msTot%1'000; std::cout<<"\n\n============= Phase-3: Result ==============\n"; std::cout<<"Private key : 0x"<