// Copyright(c) 2020 - 2025 Ryan Scott White
// Licensed under the MIT License. See LICENSE.txt in the project root for details.
// Starting 2/25, ChatGPT/Claude/Copilot/Grok were used in the development of this library.
using System.ComponentModel;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics.X86;
using static BigFloatLibrary.BigIntegerTools;
namespace BigFloatLibrary;
// BigFloat.cs (this file) - contains core BigFloat struct and its core properties and methods.
// BigIntegerTools.cs - helper methods for working with BigIntegers
// optional: (contains additional methods that are not part of the core)
// BigFloatCompareTo.cs: extra string functions as well as the IComparable, IEquatable, and IFormattable interfaces.
// BigFloatExtended.cs: extra functions that do not fall into the other categories.
// BigFloatMath.cs: extra math functions like Log, Sqrt, Exp, etc.
// BigFloatParsing.cs: extra parsing functions for converting strings to BigFloat.
// BigFloatRandom.cs: functions for generating random BigFloats
// BigFloatRoundShiftTruncate.cs: extra rounding, shifting, truncating, or splitting functions.
// BigFloatStringsAndSpans.cs: extra functions related to converting BigFloat to strings/spans
///
/// BigFloat stores a BigInteger with a floating radix point.
///
public readonly partial struct BigFloat
// IFormattable, ISpanFormattable - see BigFloatCompareTo.cs
// IComparable, IComparable, IEquatable - see BigFloatCompareTo.cs
{
///
/// The number of extra hidden guard bits in the mantissa to aid in better precision.
/// GuardBits are a fixed amount of least-significant sub-precise bits.
/// These bits help guard against some nuisances such as "7" * "9" being "60".
///
public const int GuardBits = 32; // 0-62, must be even (for sqrt)
///
/// Gets the full integer's data bits, including guard bits.
///
private readonly BigInteger _mantissa;
///
/// _size is the count of precision bits and equals ABS(DataBits).GetBitLength(). Using ABS handles power-of-two negatives
/// consistently for positive and negative values.
/// _size includes GuardBits (the Size property subtracts them).
/// _size does not include rounding from GuardBits; for example 11[111...111] (where [111...111] represents the guard bits)
/// is still 2 bits, so the user will see 0b100 with a size of 2.
/// _size is 0 only when 'DataBits==0'. When BigFloat is Zero, the size is zero.
///
internal readonly int _size;
///
/// The binary Scale (or -Accuracy) is the amount to left shift (<<) the DataBits (i.e. right shift the radix point) to get to the desired value.
/// When Scale is Zero, the value is equal to the DataBits with the GuardBits removed. (i.e. DataBits >> GuardBits)
/// When BigFloat is Zero, scale is the point of least accuracy.
/// note: _scale = Scale-GuardBits (or Scale = _scale + GuardBits)
/// 11|1.1000 Scale < 0
/// 111.|1000 Scale ==0
/// 111.10|00 Scale > 0
///
public readonly int Scale { get; init; }
///
/// The Size is the precision. It is the number of bits required to hold the number.
/// GuardBits are subtracted out. Use SizeWithGuardBits to include GuardBits.
///
public readonly int Size => Math.Max(0, _size - GuardBits);
///
/// Returns the base-2 exponent of the number. This is the amount shift a simple 1 bit to the leading bit location.
/// Examples: dataBits:11010 with BinExp: 3 -> 1101.0 -> 1.1010 x 2^ 3
/// dataBits:11 with BinExp:-1 -> 0.11 -> 1.1 x 2^-1
///
public int BinaryExponent => Scale + _size - GuardBits - 1;
//see BigFloatZeroNotes.txt for notes
///
/// Returns true when the value is considered zero after applying guard-bit tolerance.
/// A value is treated as zero if _size == 0 or if _size < GuardBits
/// and _size + Scale < GuardBits, meaning every significant bit sits below the
/// guard-bit boundary. This same near-zero rule governs sign-related properties.
///
public bool IsZero => _size < 32 && ((_size == 0) || (_size + Scale < 32));
///
/// Returns true if there is less than 1 bit of precision. However, a false value does not guarantee that the number is precise.
///
public bool IsOutOfPrecision => _size < GuardBits;
///
/// Returns true if the stored mantissa is positive and the value is not treated as zero by .
/// GuardBits are respected through the zero-tolerance rule only; no additional rounding is performed here.
///
public bool IsPositive => _mantissa.Sign > 0 && !IsZero;
///
/// Returns true if the stored mantissa is negative and the value is not treated as zero by .
/// GuardBits are respected through the zero-tolerance rule only; no additional rounding is performed here.
///
public bool IsNegative => _mantissa.Sign < 0 && !IsZero;
///
/// Reports the sign of the mantissa while honoring the guard-bit-aware "near-zero" tolerance enforced by .
/// Returns -1 for negative, 0 for zero (or effectively zero), and +1 for positive.
///
public int Sign => !IsZero ? _mantissa.Sign : 0;
///
/// Returns the default zero with a zero size, precision, scale, and accuracy.
///
[Obsolete("Use ZeroWithAccuracy(...) or the integer literal 0 instead. This member will be removed in a future major version.")]
public static BigFloat Zero => ZeroWithAccuracy(0);
///
/// Returns a '1' with only 1 bit of precision. (1 << GuardBits)
///
[Obsolete("Use OneWithAccuracy(...) or the integer literal 1 instead. This member will be removed in a future major version.")]
public static BigFloat One => OneWithAccuracy(0);
const double LOG2_OF_10 = 3.32192809488736235;
///
/// Returns a zero BigFloat with a specific accuracy budget encoded into .
/// The argument may range from -GuardBits to and represents
/// how many fractional binary digits of context to preserve below the radix point.
/// Example: -4 treats the value as zero but reserves four fractional places (plus GuardBits) of implied accuracy.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat ZeroWithAccuracy(int accuracy)
{
return new BigFloat(BigInteger.Zero, -accuracy, 0);
}
///
/// Returns a one BigFloat with specified least precision for maintaining accuracy context
///
/// The wanted accuracy between -32(GuardBits) to Int.MaxValue.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat OneWithAccuracy(int accuracy)
{
//ArgumentOutOfRangeException.ThrowIfLessThan(accuracy, -32);
return new(BigInteger.One << (GuardBits + accuracy), -accuracy, GuardBits + 1 + accuracy);
}
///////////////////////// CONVERSION FUNCTIONS /////////////////////////
///
/// Constructs a BigFloat using the raw elemental parts. The user is responsible to pre-up-shift rawValue and set and with respect to the GuardBits.
///
/// The raw mantissa value as a BigInteger. It should INCLUDE the GuardBits.
/// How much should the be shifted or scaled? This shift (base-2 exponent) will be applied to the .
/// The size of rawValue.
private BigFloat(BigInteger rawValue, int binaryScaler, int rawValueSize)
{
_mantissa = rawValue;
Scale = binaryScaler;
_size = rawValueSize;
AssertValid();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigFloat CreateFromInteger(BigInteger value, int valueSize, int binaryScaler, bool valueIncludesGuardBits, int requestedPrecision)
{
EnsureNonNegativePrecision(requestedPrecision);
if (valueSize == 0)
{
return new BigFloat(BigInteger.Zero, binaryScaler - requestedPrecision, 0);
}
int effectivePrecision = Math.Max(requestedPrecision, valueSize - GuardBits);
int guardBitsToAdd = valueIncludesGuardBits ? 0 : GuardBits;
int applyGuardBits = guardBitsToAdd + (effectivePrecision - valueSize);
BigInteger mantissa = value << applyGuardBits;
return new BigFloat(mantissa,
binaryScaler - effectivePrecision + valueSize,
guardBitsToAdd + effectivePrecision);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int AdjustIntegerPrecisionForFullWidth(int valueSize, int binaryPrecision, int defaultBinaryPrecision)
{
EnsureNonNegativePrecision(binaryPrecision);
// When callers accept the default precision, automatically widen to include the
// full integer payload so the guard region remains purely additive.
if (binaryPrecision == defaultBinaryPrecision && valueSize > defaultBinaryPrecision)
{
return valueSize;
}
return binaryPrecision;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigFloat CreateFromSignedInteger(long value, int valueSize, int binaryScaler, bool valueIncludesGuardBits, int binaryPrecision, int defaultBinaryPrecision)
{
int effectiveBinaryPrecision = AdjustIntegerPrecisionForFullWidth(valueSize, binaryPrecision, defaultBinaryPrecision);
return CreateFromInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, effectiveBinaryPrecision);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigFloat CreateFromUnsignedInteger(ulong value, int valueSize, int binaryScaler, bool valueIncludesGuardBits, int binaryPrecision, int defaultBinaryPrecision)
{
int effectiveBinaryPrecision = AdjustIntegerPrecisionForFullWidth(valueSize, binaryPrecision, defaultBinaryPrecision);
return CreateFromInteger((BigInteger)value, valueSize, binaryScaler, valueIncludesGuardBits, effectiveBinaryPrecision);
}
///
/// Creates a from a while splitting bits between
/// the in-precision region and the 32 guard bits. The
/// parameter grows the in-precision region; any excess source bits (up to 32) overflow into the
/// guard area while remaining guard bits are zero-filled. The in-precision portion is never smaller
/// than valueBitLength - GuardBits, ensuring at most 32 payload bits migrate into the guard
/// region. The mantissa is left-shifted so that reflects the requested
/// plus the precision split. When is zero,
/// becomes binaryScaler - addedBinaryPrecision and _size is zero,
/// preserving only the desired accuracy budget.
///
/// The integer payload for the mantissa. Negative values are preserved.
/// Additional base-2 scaling added to the resulting .
/// True if already contains the guard bits.
/// Extra in-precision bits to add above the magnitude of .
public BigFloat(BigInteger value, int binaryScaler = 0, bool valueIncludesGuardBits = false, int addedBinaryPrecision = 0)
{
EnsureNonNegativePrecision(addedBinaryPrecision);
int valueSize = MantissaSize(value);
int requestedPrecision = checked(valueSize + addedBinaryPrecision);
EnsureNonNegativePrecision(requestedPrecision);
this = CreateFromInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, requestedPrecision);
}
///
/// Creates a from a 32-bit signed integer. The
/// parameter sets how many in-precision bits are kept; any remaining payload bits (up to 32) move into the
/// most significant portion of the guard region and unused guard bits are zero. The in-precision portion is
/// clamped so that no more than payload bits shift below the precision boundary.
/// is offset by while is computed
/// as binaryScaler + valueBitLength - inPrecision. Zero inputs leave _size at 0 and set
/// to binaryScaler - binaryPrecision so the requested accuracy is preserved.
///
/// The integer to convert.
/// Additional base-2 scaling applied to the resulting exponent.
/// Set to true if already includes guard bits.
/// Requested in-precision bits (minimum 0; default preserves 31 bits).
public BigFloat(int value, int binaryScaler = 0, bool valueIncludesGuardBits = false, int binaryPrecision = 31)
{
const int DefaultBinaryPrecision = 31;
uint magnitude = value > 0
? (uint)value
: unchecked((uint)(-value));
int valueSize = magnitude == 0 ? 0 : BitOperations.Log2(magnitude) + 1;
this = CreateFromSignedInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, binaryPrecision, DefaultBinaryPrecision);
}
public static BigFloat CreateWithPrecisionFromValue(long value, bool valueIncludesGuardBits = false, int adjustBinaryPrecision = 0, int binaryScaler = 0)
{
ulong magnitude = value > 0
? (ulong)value
: unchecked((ulong)(-value));
int valueSize = magnitude == 0 ? 0 : (int)ulong.Log2(magnitude) + 1;
int requestedPrecision = checked(valueSize + adjustBinaryPrecision);
EnsureNonNegativePrecision(requestedPrecision);
return CreateFromInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, requestedPrecision);
}
public static BigFloat CreateWithPrecisionFromValue(ulong value, bool valueIncludesGuardBits = false, int adjustBinaryPrecision = 0, int binaryScaler = 0)
{
int valueSize = value == 0 ? 0 : BitOperations.Log2(value) + 1;
int requestedPrecision = checked(valueSize + adjustBinaryPrecision);
EnsureNonNegativePrecision(requestedPrecision);
return CreateFromInteger((BigInteger)value, valueSize, binaryScaler, valueIncludesGuardBits, requestedPrecision);
}
///
/// Creates a from a 64-bit signed integer.
/// keeps that many in-precision bits (minimum 0) while any remaining integer bits are shifted into
/// the guard region (up to 32 bits). The precision is clamped so that at most
/// payload bits fall into the guard area. becomes binaryScaler + valueBitLength - inPrecision
/// so the numeric value is unchanged and is offset by .
/// Zero inputs set _size to 0 and to binaryScaler - binaryPrecision.
///
/// The integer to convert.
/// Additional base-2 scaling applied after constructing the mantissa.
/// True when already holds guard bits.
/// Requested in-precision bits (defaults to 63 for signed 64-bit inputs).
public BigFloat(long value, int binaryScaler = 0, bool valueIncludesGuardBits = false, int binaryPrecision = 63)
{
const int DefaultBinaryPrecision = 63;
ulong magnitude = value > 0
? (ulong)value
: unchecked((ulong)(-value));
int valueSize = magnitude == 0 ? 0 : (int)ulong.Log2(magnitude) + 1;
this = CreateFromSignedInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, binaryPrecision, DefaultBinaryPrecision);
}
///
/// Creates a from an unsigned 64-bit integer. keeps
/// that many bits in precision (minimum 0) while any remaining source bits spill into the guard bits (up to 32).
/// The in-precision portion is clamped so at most payload bits move into the guard region.
/// The is computed as binaryScaler + valueBitLength - inPrecision so that
/// reflects the incoming value plus the provided scaler. Zero inputs set
/// _size to 0 and to binaryScaler - binaryPrecision.
///
/// The unsigned integer to convert.
/// Additional base-2 scaling applied to the resulting value.
/// True if already includes guard bits.
/// Requested in-precision bits (defaults to 64 for unsigned inputs).
public BigFloat(ulong value, int binaryScaler = 0, bool valueIncludesGuardBits = false, int binaryPrecision = 64)
{
const int DefaultBinaryPrecision = 64;
int valueSize = value == 0 ? 0 : BitOperations.Log2(value) + 1;
this = CreateFromUnsignedInteger(value, valueSize, binaryScaler, valueIncludesGuardBits, binaryPrecision, DefaultBinaryPrecision);
}
// Design note on the default (binaryPrecision = 37) for double → BigFloat
// ---------------------------------------------------------------------
// A IEEE 754 double has a 53-bit significand (including the hidden bit).
// When converting to BigFloat we need to decide how many of those bits become
// in-precision bits (visible to Size/Precision) and how many are treated as
// guard bits (extra rounding headroom).
//
// In this constructor the default is:
// • 37 bits loaded into the in-precision region, and
// • 16 of the original double bits placed at the top of the GuardBits area,
// with the remaining guard bits implicitly zero.
//
// Earlier versions kept all 53 bits in the in-precision area and *none* in the
// guard area, which tended to expose double’s edge-case rounding behaviour
// directly in BigFloat arithmetic. At the other extreme, pushing all 32 guard
// bits under the precision boundary would leave only 21 in-precision bits
// (53 − 32), which is often too coarse for a value that started as a double.
//
// The 37 / 16 split is therefore a compromise:
// • enough in-precision bits (37) to represent typical double values cleanly,
// • a non-trivial number of “real” double bits in the guard region (16) to
// absorb rounding during subsequent operations, and
// • the option for callers to override this balance via the binaryPrecision
// parameter if a different trade-off is desired.
///
/// Creates a from a . The
/// argument selects how many of the 53 IEEE significand bits remain in the in-precision region while the
/// rest (up to 32 bits) occupy the most significant guard bits; unused guard bits are zero-filled. The
/// minimum in-precision count is 21 so at most 32 double bits spill into guard bits, yielding the
/// “37 precise + 16 guard” default split. shifts the resulting
/// by the requested amount. Zero inputs set _size to 0 and
/// to binaryScaler + GuardBits - inPrecision so the encoded accuracy matches the
/// requested precision.
///
/// The to convert.
/// Additional base-2 scaling applied after conversion.
/// Number of in-precision bits (clamped between 21 and 53, default 37).
public BigFloat(double value, int binaryScaler = 0, int binaryPrecision = 37)
{
EnsureNonNegativePrecision(binaryPrecision);
long bits = BitConverter.DoubleToInt64Bits(value);
long mantissa = bits & 0x000F_FFFF_FFFF_FFFFL;
int exp = (int)((bits >> 52) & 0x7ffL);
bool isNegative = (bits & (1L << 63)) != 0;
if (exp == 2047)
{
if (double.IsNaN(value))
{
ThrowInvalidInitializationException("Value is NaN");
}
ThrowInvalidInitializationException("Value is infinity");
}
if (exp != 0)
{
mantissa |= 0x0010_0000_0000_0000L; // restore the hidden bit
}
const int significandBits = 53;
int availablePrecision = exp != 0
? significandBits
: (mantissa == 0 ? significandBits : Math.Max(1, GetBitLength((ulong)mantissa)));
int minPrecision = Math.Max(0, availablePrecision - GuardBits);
int inPrecision = Math.Clamp(binaryPrecision, minPrecision, availablePrecision);
int guardContribution = availablePrecision - inPrecision;
int shift = GuardBits - guardContribution;
if (mantissa == 0)
{
_mantissa = 0;
Scale = binaryScaler + GuardBits - inPrecision;
_size = 0;
AssertValid();
return;
}
BigInteger shiftedMantissa = new BigInteger(isNegative ? -mantissa : mantissa) << shift;
int actualExponent = exp != 0 ? exp - 1023 : 1 - 1023;
int baseScale = actualExponent - (significandBits - 1) + binaryScaler;
_mantissa = shiftedMantissa;
Scale = baseScale - shift + GuardBits;
_size = GuardBits + inPrecision;
AssertValid();
}
///
/// Creates a from a . picks how
/// many of the 24 IEEE significand bits remain in the in-precision region; the remainder (up to 24 bits,
/// bounded by the 32 guard bits) move into the guard region to provide rounding headroom. The in-precision
/// portion is clamped to keep at most payload bits in guard territory while aligning
/// with the source exponent. Zero inputs set _size to 0 and
/// to GuardBits - inPrecision so the requested precision is preserved even when the
/// numeric value is zero.
///
/// The to convert.
/// In-precision bits to retain (clamped between 0 and 24, default 16).
public BigFloat(float value, int binaryPrecision = 16)
{
EnsureNonNegativePrecision(binaryPrecision);
int bits = BitConverter.SingleToInt32Bits(value);
int mantissa = bits & 0x007F_FFFF;
int exp = (bits >> 23) & 0xff;
bool isNegative = (bits & (1 << 31)) != 0;
if (exp == 255)
{
if (float.IsNaN(value))
{
ThrowInvalidInitializationException("Value is NaN");
}
ThrowInvalidInitializationException("Value is infinity");
}
if (exp != 0)
{
mantissa |= 0x0080_0000; // restore the hidden bit
}
const int significandBits = 24;
int availablePrecision = exp != 0
? significandBits
: (mantissa == 0 ? significandBits : Math.Max(1, GetBitLength((ulong)mantissa)));
int minPrecision = Math.Max(0, availablePrecision - GuardBits);
int inPrecision = Math.Clamp(binaryPrecision, minPrecision, availablePrecision);
int guardContribution = availablePrecision - inPrecision;
int shift = GuardBits - guardContribution;
if (mantissa == 0)
{
_mantissa = 0;
Scale = GuardBits - inPrecision;
_size = 0;
AssertValid();
return;
}
BigInteger shiftedMantissa = new BigInteger(isNegative ? -mantissa : mantissa) << shift;
int actualExponent = exp != 0 ? exp - 127 : 1 - 127;
int baseScale = actualExponent - (significandBits - 1);
_mantissa = shiftedMantissa;
Scale = baseScale - shift + GuardBits;
_size = GuardBits + inPrecision;
AssertValid();
}
///
/// Bit length calculation using hardware intrinsics when available
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int GetBitLength(ulong value)
{
if (value == 0) return 0;
// Use hardware intrinsics for better performance when available
if (Lzcnt.X64.IsSupported)
{
return 64 - (int)Lzcnt.X64.LeadingZeroCount(value);
}
return BitOperations.Log2(value) + 1;
}
///
/// Constructs a BigFloat using the raw elemental components. The user is responsible to pre-up-shift rawValue and set and with respect to the GuardBits.
///
/// The raw integer part that includes the GuardBits.
/// How much should the be shifted or scaled? This shift (base-2 exponent) will be applied to the .
/// The size of the .
public static BigFloat CreateFromRawComponents(BigInteger mantissa, int binaryScaler, int mantissaSize)
{
return new(mantissa, binaryScaler, mantissaSize);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void EnsureNonNegativePrecision(int precision)
{
if (precision < 0)
{
ThrowInvalidInitializationException($"binaryPrecision ({precision}) cannot be negative.");
}
}
[DoesNotReturn]
private static void ThrowInvalidInitializationException(string reason)
{
throw new OverflowException($"Invalid BigFloat initialization: {reason}");
}
///////////////////////// [END] INIT / CONVERSION FUNCTIONS [END] /////////////////////////
///
/// Checks to see if the value is an integer.
/// Returns True if...
/// - the scale >= (GuardBits/2)
/// - or, all bits between the point and 16 bits into the GuardBits are all 0 or 1.
///
/// If an integer, it should follow that ...
/// - it should not round-up based on GuardBits
/// - Ceiling() would not round-up and Floor() would not round-down.
///
public bool IsInteger //v8 - check to see if all the bits between the point and the 16 most significant guard bits are uniform. (111.??|?)
{
get => Ceiling() == Floor();
}
///
/// Tests to see if the number is in the format of "10000000..." after rounding guard-bits.
///
public bool IsOneBitFollowedByZeroBits => BigInteger.IsPow2(BigInteger.Abs(_mantissa) >> (GuardBits - 1));
///
/// Returns the number of matching leading bits with rounding. e.g. 10.111 - 10.101 is 00.010 so returns 4
/// The Exponent(or Scale + _size) is considered. e.g. 100. and 1000. would return 0
/// If the signs do not match then 0 is returned.
/// When a rollover is near these bits are included. e.g. 11110 and 100000 returns 3
/// GuardBits are included.
///
/// Returns the sign of a-b. Example: If a is larger, sign is set to 1.
public static int NumberOfMatchingLeadingBitsWithRounding(BigFloat a, BigFloat b, out int sign)
{
int maxSize = Math.Max(a._size, b._size);
int expDiff = a.BinaryExponent - b.BinaryExponent;
if (maxSize == 0 || a.Sign != b.Sign || Math.Abs(expDiff) > 1)
{
sign = (expDiff > 0) ? a.Sign : -b.Sign;
return 0;
}
int scaleDiff = a.Scale - b.Scale;
BigInteger temp = (scaleDiff < 0) ?
a._mantissa - (b._mantissa << scaleDiff)
: (a._mantissa >> scaleDiff) - b._mantissa;
sign = temp.Sign;
return maxSize - (int)BigInteger.Log2(BigInteger.Abs(temp)) - 1;
}
///
/// Returns the number of matching leading bits that exactly match. GuardBits are included.
/// i.e. The number of leading bits that exactly match.
/// e.g. 11010 and 11111 returns 2
/// e.g. 100000 and 111111 returns 1
/// If the signs do not match then 0 is returned.
/// The scale and precision(size) is ignored. e.g. 11101000000 and 11111 returns 3
///
public static int NumberOfMatchingLeadingMantissaBits(BigFloat a, BigFloat b)
{
if (a.Sign != b.Sign) { return 0; }
int sizeDiff = a._size - b._size;
int newSize = sizeDiff > 0 ? b._size : a._size;
if (newSize == 0) { return 0; }
BigInteger temp = (sizeDiff < 0) ?
a._mantissa - (b._mantissa << sizeDiff)
: (a._mantissa >> sizeDiff) - b._mantissa;
return newSize - (int)BigInteger.Log2(BigInteger.Abs(temp)) - 1;
}
///////////////////////// Min / Max /////////////////////////
///
/// Returns the smaller of two values.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Min(in BigFloat x, in BigFloat y)
=> SelectMinMax(in x, in y, pickMin: true);
///
/// Returns the larger of two values.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Max(in BigFloat x, in BigFloat y)
=> SelectMinMax(in x, in y, pickMin: false);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigFloat SelectMinMax(in BigFloat x, in BigFloat y, bool pickMin)
{
if (x.Scale == y.Scale && x._size == y._size && x._mantissa == y._mantissa)
{
return x;
}
if (x._size > GuardBits && y._size > GuardBits)
{
int sx = x._mantissa.Sign;
int sy = y._mantissa.Sign;
if (sx != sy)
{
if (pickMin)
{
return sx < sy ? x : y;
}
return sx > sy ? x : y;
}
}
int cmp = Compare(in x, in y);
if (cmp == 0)
{
if (x._size != y._size)
{
return x._size >= y._size ? x : y;
}
if (x.Scale != y.Scale)
{
return x.Scale <= y.Scale ? x : y;
}
return x;
}
if (pickMin)
{
return cmp < 0 ? x : y;
}
return cmp > 0 ? x : y;
}
///////////////////////// Operator Overloads: BigFloat <--> BigFloat /////////////////////////
/// Returns true if the left side BigFloat is equal to the right side BigFloat.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator ==(BigFloat left, BigFloat right)
{
return left.Equals(right);
}
/// Returns true if the left side BigFloat is not equal to the right BigFloat.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator !=(BigFloat left, BigFloat right)
{
return !left.Equals(right);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator <(BigFloat left, BigFloat right)
{
return left.CompareTo(right) < 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator >(BigFloat left, BigFloat right)
{
return left.CompareTo(right) > 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator <=(BigFloat left, BigFloat right)
{
return left.CompareTo(right) <= 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool operator >=(BigFloat left, BigFloat right)
{
return left.CompareTo(right) >= 0;
}
///////////////////////// Operator Overloads: BigFloat <--> BigInteger /////////////////////////
public static BigFloat operator /(BigFloat numerator, BigFloat denominator)
{
// Early exit for zero divisor
if (denominator.IsStrictZero)
{
throw new DivideByZeroException("Division by zero");
}
// Standard division algorithm
return DivideStandard(numerator, denominator);
}
///
/// Standard division algorithm with optimizations
///
private static BigFloat DivideStandard(BigFloat divisor, BigFloat dividend)
{
// find the size of the smaller input to determine output size
int outputSize = Math.Min(divisor.Size, dividend.Size);
// If we right-shift divisor to align it with dividend and then divisor < dividend, then we need to decrement the output size.
// This is because we would only have a partial bit of precision on the last bit, and it could introduce error.
// note: We could also left shift dividend so it is left aligned with divisor but that would be more expensive. (but could be more accurate)
// note: We can maybe speed this up by just checking the top 32 or 64 bits of each.
if (divisor._mantissa >> (divisor.Size - dividend.Size) <= dividend._mantissa)
{
outputSize--;
}
// We need to oversize T (using left shift) so when we divide, it is the correct size.
int wantedSizeForT = dividend.Size + outputSize + GuardBits;
int leftShiftTBy = wantedSizeForT - divisor.Size;
BigInteger leftShiftedT = divisor._mantissa << leftShiftTBy;
BigInteger resIntPart = leftShiftedT / dividend._mantissa;
int resScalePart = divisor.Scale - dividend.Scale - leftShiftTBy + GuardBits;
int sizePart = MantissaSize(resIntPart);
return new BigFloat(resIntPart, resScalePart, sizePart);
}
///
/// Performs a Modulus operation.
/// For positive values, Modulus is identical to Remainder, for negatives, Modulus and Remainder differ.
/// The remainder is slightly faster.
///
public static BigFloat Remainder(BigFloat dividend, BigFloat divisor)
{
int scaleDiff = dividend.Scale - divisor.Scale;
if (divisor._mantissa == 0)
throw new DivideByZeroException();
if (scaleDiff == 0)
return new(dividend._mantissa % divisor._mantissa, divisor.Scale, true);
if (scaleDiff < 0)
{
int t = -scaleDiff;
// ((A % (B<> t) == ((A >> t) % B) — no giant left shifts
BigInteger r = (dividend._mantissa >> t) % divisor._mantissa;
return new(r, divisor.Scale, true);
}
else
{
// s > 0
BigInteger m = divisor._mantissa;
// Optional fast path when divisor has ≥ s trailing zeros:
int tz = (int)BigInteger.TrailingZeroCount(BigInteger.Abs(m));
if (scaleDiff <= tz)
{
BigInteger r = dividend._mantissa % (m >> scaleDiff);
return new(r, dividend.Scale, true);
}
// General path: ((A<> s → ((|A| % |m|) * (2^s mod |m|) % |m|) with sign(A), then >> s
int u = scaleDiff - tz;
BigInteger mOdd = m >> tz; // strip 2^tz from modulus
BigInteger mAbs = BigInteger.Abs(mOdd);
BigInteger aRem = dividend._mantissa % mOdd; // sign matches dividend
BigInteger pow2 = BigInteger.ModPow(2, u, mAbs); // O(log s), no big temps
BigInteger tRem = (BigInteger.Abs(aRem) * pow2) % mAbs;
if (aRem.Sign < 0) tRem = -tRem;
BigInteger r2 = tRem >> u;
return new(r2, dividend.Scale, true);
}
}
///
/// Performs a modulus operation. For negative numbers there are two approaches, a math and programmers version. For negative numbers this version uses the programmers version.
///
public static BigFloat operator %(BigFloat dividend, BigFloat divisor)
{
return Remainder(dividend, divisor);
}
///
/// Mathematical modulo operation.
/// The result has the same sign as .
/// For positive values, modulo is identical to Remainder
/// Implemented as: r = Remainder(dividend, divisor); if (r == 0 or sign(r)==sign(divisor)) return r; else return r + divisor.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Mod(in BigFloat dividend, in BigFloat divisor)
{
// Remainder is scale-aware and avoids huge shifts; it throws on divide-by-zero.
// (See implementation in this file.)
BigFloat rem = Remainder(dividend, divisor);
// Exact multiple ⇒ keep exact zero (and its accuracy context).
if (rem._mantissa.IsZero) return rem;
// Already the right sign (same as divisor) ⇒ done.
// Uses raw mantissa signs to avoid CompareTo/Zero construction or rounding.
if (rem._mantissa.Sign == divisor._mantissa.Sign) return rem;
// Otherwise, shift into the correct range by adding one divisor.
return rem + divisor;
}
///
/// Splits the BigFloat into integer and fractional parts. (i.e. ModF)
///
public (BigFloat integer, BigFloat fraction) SplitIntegerAndFractionalParts()
{
int bitsToClear = GuardBits - Scale;
if (bitsToClear <= 0) return (this, ZeroWithAccuracy(0));
if (bitsToClear >= _size) return (ZeroWithAccuracy(0), this);
// For integer part, use shift operations to avoid two's complement issues
BigInteger intPart = ClearLowerNBits(_mantissa, bitsToClear);
BigInteger fracPart = _mantissa - intPart;
return (
new BigFloat(intPart, Scale, _size),
fracPart.IsZero ? ZeroWithAccuracy(0) : new BigFloat(fracPart, Scale, MantissaSize(fracPart))
);
}
///
/// Bitwise Complement Operator - Reverses each bit in the data bits. Scale is not changed.
/// The size is reduced by at least 1 bit. This is because the leading bit is flipped to a zero.
///
public static BigFloat operator ~(BigFloat value)
{
BigInteger temp = value._mantissa ^ ((BigInteger.One << value._size) - 1);
return new(temp, value.Scale, true);
}
///
/// Left shifts by increasing the scale by the amount left shift amount.
/// The precision is unchanged.
///
/// The value the shift should be applied to.
/// The number of bits to shift left.
/// A new BigFloat with the internal 'int' up shifted.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator <<(BigFloat value, int shift)
{
return new(value._mantissa, value.Scale + shift, value._size);
}
///
/// Right shifts by decreasing the scale by the amount right shift amount.
/// The precision is unchanged.
///
/// The value the shift should be applied to.
/// The number of bits to shift right.
/// A new BigFloat with the internal 'int' down shifted.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator >>(BigFloat value, int shift)
{
return new(value._mantissa, value.Scale - shift, value._size);
}
#region Named operator alternatives (CA2225)
/// Named alternative for operator / (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Divide(BigFloat numerator, BigFloat denominator) => numerator / denominator;
/// Named alternative for operator / (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Divide(BigFloat value, int divisor) => value / divisor;
/// Named alternative for operator / (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Divide(int value, BigFloat divisor) => value / divisor;
/// Named alternative for operator ~ (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat OnesComplement(BigFloat value) => ~value;
/// Named alternative for operator << (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat LeftShift(BigFloat value, int shift) => value << shift;
/// Named alternative for operator >> (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat RightShift(BigFloat value, int shift) => value >> shift;
/// Named alternative for operator ++ (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Increment(BigFloat value) => ++value;
/// Named alternative for operator -- (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Decrement(BigFloat value) => --value;
/// Named alternative for unary operator + (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Plus(BigFloat value) => +value;
/// Named alternative for unary operator - (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Negate(BigFloat value) => -value;
/// Named alternative for operator + (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Add(BigFloat left, BigFloat right) => left + right;
/// Named alternative for operator + (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Add(BigFloat left, int right) => left + right;
/// Named alternative for operator - (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Subtract(BigFloat left, BigFloat right) => left - right;
/// Named alternative for operator - (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Subtract(BigFloat left, int right) => left - right;
/// Named alternative for explicit conversion from BigInteger (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat FromBigInteger(BigInteger value) => (BigFloat)value;
/// Named alternative for explicit conversion from double (CA2225).
[EditorBrowsable(EditorBrowsableState.Advanced)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat FromDouble(double value) => (BigFloat)value;
#endregion
///
/// Increments the integer part of a BigFloat by one.
///
public static BigFloat operator ++(BigFloat r)
{
// assuming GuardBits is 4:
// 1111|1111__. => 1111|1111<< 6 +1 => 1111|1111__.
// 1111|1111_. => 1111|1111<< 5 +1 => 10000|0000#.
// 1111|1111. => 1111|1111<< 4 +1 => 10000|0000.
// 1111|1.111 => 1111|1111<< 1 +1 => 10000|0.111
// 1111.|1111 => 1111|1111<< 0 +1 => 10000.|1111
// 111.1|1111 => 1111|1111<< -1 +1 => 1000.1|1111
// .1111|1111 => 1111|1111<< -4 +1 => 1.1111|1111
//.01111|1111 => 1111|1111<< -5 +1 => 1.01111|1111
int onesPlace = GuardBits - r.Scale;
if (onesPlace < 1)
{
return r; // A => -2 or less
}
// In the special case, we may not always want to round up when adding a 1 bit just below the LSB.
if (onesPlace == -1 && !r._mantissa.IsEven)
{
onesPlace = 0;
}
BigInteger intVal = r._mantissa + (BigInteger.One << onesPlace);
int sizeVal = MantissaSize(intVal);
// int sizeVal = (onesPlace > r._size) ? onesPlace +1 : //future: for performance, faster just to calc?
// r._size + ((BigInteger.TrailingZeroCount(intVal) == r._size) ? 1 : 0);
return new BigFloat(intVal, r.Scale, sizeVal);
}
///
/// Decrements the integer part of a BigFloat by one.
///
public static BigFloat operator --(BigFloat r)
{
int onesPlace = GuardBits - r.Scale;
if (onesPlace < 1)
{
return r;
}
// In the special case, we may not always want to round up when adding a 1 bit just below the LSB.
if (onesPlace == -1 && !r._mantissa.IsEven)
{
onesPlace = 0;
}
BigInteger intVal = r._mantissa - (BigInteger.One << onesPlace);
int sizeVal = MantissaSize(intVal);
//int sizeVal = (onesPlace > r._size) ? onesPlace +1 : //future: faster just to calc?
// r._size + ((BigInteger.TrailingZeroCount(intVal) == r._size) ? 1 : 0);
return new BigFloat(intVal, r.Scale, sizeVal);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator +(BigFloat r)
{
return r;
}
///
/// Negates a BigFloat value (i.e. changes its sign).
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator -(BigFloat r)
{
return new(-r._mantissa, r.Scale, r._size);
}
public static BigFloat operator +(BigFloat r1, BigFloat r2)
{
// Shortcuts (to benchmark, does it actually save any time)
// Given GuardBits = 8, a number like "B2D"00 + 0.00"3F" should be just "B2D"00 since the smaller number is below the precision range.
//
// Example: "12345678"9ABCDEF0________. (Size: 29, _size: 61, Scale: 64)
// + "12"34560.789A (Size: 5, _size: 37, Scale: 20)
// = 12345678"9ABCDEF0________.
//
// (if 64(r1.Scale) > 37(r2._size) + (20)r2.Scale then just return r1)
int scaleDiff = r1.Scale - r2.Scale;
// Optimized shortcuts for very different scales
if (scaleDiff > r2._size)
{
return r1;
}
if (-scaleDiff > r1._size)
{
return r2;
}
// Any Precision that is below the precision of the number with a larger scale would be dropped off.
// Example: all the 7's would just be dropped off.
// "5555"00000 input:5555 shift:5(decimal)
// +"55577777"
// -------------
// "49"9922223 <--- answer is 50, only 2 significant digits.
if (r1.Scale < r2.Scale)
{
BigInteger intVal0 = RoundingRightShift(r1._mantissa, -scaleDiff) + r2._mantissa;
int resSize0 = MantissaSize(intVal0);
return new BigFloat(intVal0, r2.Scale, resSize0);
}
BigInteger intVal = r1._mantissa + RoundingRightShift(r2._mantissa, scaleDiff);
int sizeVal = MantissaSize(intVal);
return new BigFloat(intVal, r1.Scale, sizeVal);
}
public static BigFloat operator +(BigFloat r1, int r2) // Ryan
{
if (int.Log2(int.Abs(r2)) + 1 + (GuardBits - r1.Scale) <= 0)
{
return r1;
}
BigInteger addVal = (BigInteger)r2 << (GuardBits - r1.Scale);
addVal += r1._mantissa;
return new BigFloat(addVal, r1.Scale, MantissaSize(addVal));
}
///////////////////////// Rounding, Shifting, Truncate /////////////////////////
///
/// Checks to see if this integerPart would round away from zero.
///
/// The BigInteger we would like check if it would round up.
/// Returns true if this integerPart would round away from zero.
public static bool WouldRoundUp(BigInteger bi)
{
return WouldRoundUp(bi, GuardBits);
}
///
/// Checks to see if the integerPart would round away from zero.
/// e.g. 11010101 with 3 bits removed would be 11011.
///
/// Returns true if this integerPart would round away from zero.
public bool WouldRoundUp()
{
return WouldRoundUp(_mantissa, GuardBits);
}
///
/// Checks to see if this integerPart would round away from zero.
/// e.g. 11010101 with bottomBitsRemoved=3 would be 11011
///
/// The number of newSizeInBits from the least significant bit where rounding would take place.
public bool WouldRoundUp(int bottomBitsRemoved)
{
return WouldRoundUp(_mantissa, bottomBitsRemoved);
}
///
/// Checks to see if the integerPart would round-up if the GuardBits were removed.
/// e.g. 11010101 with 3 bits removed would be 11011.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool WouldRoundUp(BigInteger val, int bottomBitsRemoved)
{
bool isPos = val.Sign >= 0;
return isPos ^ ((isPos ? val : val - 1) >>> (bottomBitsRemoved - 1)).IsEven;
}
///
/// Computes the rounded mantissa without guard bits for any BigInteger input.
/// Rounding is applied based on the guard bits; assumes the input is non-negative (mantissa is typically unsigned).
///
/// The input mantissa including guard bits.
/// The rounded and shifted mantissa.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigInteger GetRoundedMantissa(BigInteger x)
{
return RoundingRightShift(x, GuardBits);
}
///
/// Gets the mantissa rounded with GuardBits removed.
/// Prefer .
///
[Obsolete("Use RoundedMantissa instead. This method will be removed in a future major version.")]
[EditorBrowsable(EditorBrowsableState.Never)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigInteger GetRoundedMantissa() => RoundedMantissa;
///
/// Computes the rounded mantissa without guard bits, also updating the size (e.g., bit length or exponent)
/// if rounding causes a carry-over (e.g., all guard bits set, leading to increment).
/// This is useful in normalization steps where overflow affects the exponent.
///
/// The input mantissa including guard bits.
/// The current size (e.g., bit count); incremented if carry occurs.
/// The rounded and shifted mantissa.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigInteger GetRoundedMantissa(BigInteger x, ref int size)
{
return RoundingRightShift(x, GuardBits, ref size);
}
///
/// Represents the raw mantissa including guard bits.
///
public readonly BigInteger RawMantissa => _mantissa;
///
/// Gets the integer part of the BigFloat with no scaling is applied. GuardBits are rounded and removed.
///
public readonly BigInteger RoundedMantissa => RoundingRightShift(_mantissa, GuardBits);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigInteger GetIntegralValue(BigFloat value)
{
if (value.Scale >= 0)
{
BigInteger mantissaWithScale = value._mantissa << value.Scale;
return RoundingRightShift(mantissaWithScale, GuardBits);
}
BigInteger mantissa = RoundingRightShift(value._mantissa, GuardBits);
int fractionalBits = -value.Scale;
return (mantissa.Sign >= 0) ? (mantissa >> fractionalBits) : -((-mantissa) >> fractionalBits);
}
///
/// Gets the integer part of the BigFloat with no scaling is applied. GuardBits are rounded and removed.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigInteger GetIntegralValue() => GetIntegralValue(this);
///
/// Truncates a value by a specified number of bits by increasing the scale and reducing the precision.
/// If the most significant bit of the removed bits is set then the least significant bit will increment away from zero.
/// e.g. 10.10010 << 2 = 10.101
/// Caution: Round-ups may percolate to the most significant bit, adding an extra bit to the size.
/// Example: 11.11 with 1 bit removed would result in 100.0 (the same size)
/// This function uses the internal BigInteger RightShiftWithRound().
/// Also see: ReducePrecision, RightShiftWithRoundWithCarry, RightShiftWithRound
///
/// Specifies the target number of least-significant bits to remove.
public static BigFloat TruncateByAndRound(BigFloat x, int targetBitsToRemove)
{
if (targetBitsToRemove < 0)
{
throw new ArgumentOutOfRangeException(nameof(targetBitsToRemove), $"Param {nameof(targetBitsToRemove)} must be 0 or greater.");
}
int newScale = x.Scale + targetBitsToRemove;
int size = x._size;
BigInteger b = RoundingRightShift(x._mantissa, targetBitsToRemove, ref size);
return new(b, newScale, size);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat TruncateByAndRound(int targetBitsToRemove) => TruncateByAndRound(this, targetBitsToRemove);
///
/// Rounds to nearest integer, preserving accuracy.
///
public static BigFloat Round(BigFloat x)
{
int bitsToClear = GuardBits - x.Scale;
if (bitsToClear <= 0) return x;
if (bitsToClear > x._size)
return ZeroWithAccuracy(x.Accuracy);
if (bitsToClear == x._size)
return OneWithAccuracy(x.Accuracy);
//BigInteger result= RightShiftWithRound(Mantissa, bitsToClear) << bitsToClear;
//return new BigFloat(result, Scale, _size);
//// below keeps the same size (it does not rollover to 1 bit larger)
(BigInteger result, bool carry) = RoundingRightShiftWithCarry(x._mantissa, bitsToClear);
return new BigFloat(result << bitsToClear, x.Scale + (carry ? 1 : 0), x._size);
}
///
/// Rounds to nearest integer, preserving accuracy.
///
public BigFloat Round() => Round(this);
///
/// Rounds to nearest integer, preserving precision.
///
[Obsolete("Use BigFloat.Round(BigFloat x) myBigFloat.Round()")]
[EditorBrowsable(EditorBrowsableState.Never)]
public static BigFloat RoundToInteger(BigFloat x) => Round(x);
///
/// Truncates towards zero, preserving accuracy.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat TruncateToIntegerKeepingAccuracy()
{
int bitsToClear = GuardBits - Scale;
if (bitsToClear <= 0) return this;
if (bitsToClear >= _size) return ZeroWithAccuracy(Accuracy);
return new BigFloat(ClearLowerNBits(_mantissa, bitsToClear), Scale, _size);
}
///
/// Truncates towards zero, preserving accuracy.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat TruncateToIntegerKeepingAccuracy(BigFloat x) => x.TruncateToIntegerKeepingAccuracy();
///
/// Truncates towards zero. Removes all fractional bits and sets negative scales to zero.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat Truncate()
{
int bitsToClear = GuardBits - Scale;
if (bitsToClear <= 0) return this;
if (bitsToClear >= _size) return 0;
BigInteger newMantissa = (_mantissa.Sign >= 0) ? _mantissa >> bitsToClear : -(-_mantissa >> bitsToClear);
return new BigFloat(newMantissa << GuardBits, Scale + bitsToClear- GuardBits, _size - bitsToClear + GuardBits);
}
///
/// Truncates towards zero. Removes all fractional bits and sets negative scales to zero.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat Truncate(BigFloat x) => x.Truncate();
///
/// Adjust the scale of a value
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat AdjustScale(BigFloat x, int changeScaleAmount)
=> new(x._mantissa, checked(x.Scale + changeScaleAmount), x._size);
///
/// Adjust the scale of a value
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat AdjustScale(int changeScaleAmount) => AdjustScale(this, changeScaleAmount);
///
/// Adjust accuracy by .
/// Positive delta increases fractional capacity; negative delta reduces it and rounds
/// using the same semantics as precision reduction.
/// Value-preserving when delta ≥ 0.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat AdjustAccuracy(BigFloat x, int deltaBits)
=> AdjustPrecision(x, deltaBits);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static BigFloat AdjustAccuracy(BigFloat x, int deltaBits, bool roundWhenShrinking)
=> AdjustPrecisionCore(x, deltaBits, roundWhenShrinking);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat AdjustAccuracy(int deltaBits) => AdjustAccuracy(this, deltaBits);
///
/// Set accuracy to (in bits).
/// Internally computes delta = newAccuracyBits - x.Accuracy and delegates to
/// /.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat SetAccuracy(BigFloat x, int newAccuracyBits)
=> (newAccuracyBits + x.Scale) == 0 ? x : AdjustPrecision(x, newAccuracyBits + x.Scale);
///
/// Set accuracy to (in bits).
/// Internally computes delta = newAccuracyBits - x.Accuracy and delegates to
/// /.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat SetAccuracy(int newAccuracyBits) => SetAccuracy(this, newAccuracyBits);
///
/// Sets the precision (and accuracy) of a number by appending 0 bits if too small or cropping bits if too large.
/// This legacy API does not round when reducing size; prefer or
/// for modern behavior.
///
[Obsolete("Use AdjustPrecision or SetPrecisionWithRound instead. This member will be removed in a future major version.")]
[EditorBrowsable(EditorBrowsableState.Never)]
public static BigFloat SetPrecision(BigFloat x, int newSize)
{
int delta = newSize - x.Size;
return AdjustPrecisionCore(x, delta, roundWhenShrinking: false);
}
///
/// Sets the precision (and accuracy) of a number by appending 0 bits if too small or cropping bits if too large.
/// This legacy API does not round when reducing size; prefer or
/// for modern behavior.
///
[Obsolete("Use AdjustPrecision or SetPrecisionWithRound instead. This member will be removed in a future major version.")]
[EditorBrowsable(EditorBrowsableState.Never)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat SetPrecision(int newSize) => SetPrecision(this, newSize);
///
/// Reduces the precision to the new specified size. To help maintain the most significant digits, the bits are not simply cut off.
/// When reducing, the least significant bit will rounded up if the most significant bit is set of the removed bits.
/// This can be used to reduce the precision of a number before prior to a calculation.
/// Caution: Round-ups may percolate to the most significant bit, adding an extra bit to the size.
/// Also see: SetPrecision, TruncateToAndRound
///
public static BigFloat SetPrecisionWithRound(BigFloat x, int newSize) =>
(x.Size - newSize) switch
{
0 => x,
> 0 => TruncateByAndRound(x, x.Size - newSize),
< 0 => AdjustPrecision(x, newSize - x.Size),
};
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat SetPrecisionWithRound(int newSize) => SetPrecisionWithRound(this, newSize);
///
/// Adjusts precision by shifting the mantissa and compensating the scale.
/// Positive appends zero bits (extends precision).
/// Negative rounds then drops low bits (reduces precision).
/// Also see: AdjustAccuracy, SetPrecision, SetPrecisionWithRound
///
///
/// Semantics for negative mantissas: reduction truncates toward zero (bit-drop), not toward -∞.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat AdjustPrecision(BigFloat x, int deltaBits)
=> AdjustPrecisionCore(x, deltaBits, roundWhenShrinking: true);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public BigFloat AdjustPrecision(int deltaBits) => AdjustPrecision(this, deltaBits);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static BigFloat AdjustPrecisionCore(BigFloat x, int deltaBits, bool roundWhenShrinking)
{
if (deltaBits == 0) return x;
if (deltaBits > 0)
{
return new BigFloat(
x._mantissa << deltaBits,
x.Scale - deltaBits,
checked(x._size + deltaBits)
);
}
int shrinkBy = -deltaBits;
if (shrinkBy >= x._size)
{
return new BigFloat(BigInteger.Zero, x.Scale + shrinkBy, 0);
}
BigInteger resizedMantissa = roundWhenShrinking
? RoundingRightShift(x._mantissa, shrinkBy)
: x._mantissa >> shrinkBy;
return new BigFloat(
resizedMantissa,
x.Scale + shrinkBy,
checked(x._size - shrinkBy)
);
}
///
/// [Obsolete] Extends the precision and accuracy of a number by appending 0 bits (no rounding).
/// Prefer with a positive delta.
///
[Obsolete("Use AdjustPrecision(x, +bitsToAdd). This method will be removed in the next major version.")]
[EditorBrowsable(EditorBrowsableState.Never)]
public static BigFloat ExtendPrecision(BigFloat x, int bitsToAdd)
=> AdjustPrecision(x, bitsToAdd);
///
/// [Obsolete] Reduces the precision by dropping low bits (no rounding).
/// Prefer with a negative delta.
///
[Obsolete("Use AdjustPrecision(x, -reduceBy). This method will be removed in the next major version.")]
[EditorBrowsable(EditorBrowsableState.Never)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat ReducePrecision(BigFloat x, int reduceBy)
=> AdjustPrecision(x, -reduceBy);
public static BigFloat operator -(BigFloat r1, BigFloat r2)
{
//// Early exit for zero operands
if (r2.IsStrictZero) return r1; // Future: review if this is needed and is accuracy preserved.
if (r1.IsStrictZero) return -r2;
BigInteger r1Bits = (r1.Scale < r2.Scale) ? (r1._mantissa >> (r2.Scale - r1.Scale)) : r1._mantissa;
BigInteger r2Bits = (r1.Scale > r2.Scale) ? (r2._mantissa >> (r1.Scale - r2.Scale)) : r2._mantissa;
BigInteger diff = r1Bits - r2Bits;
if (r1.Scale < r2.Scale ? r1.Sign < 0 : r2._mantissa.Sign < 0)
{
diff--;
}
int size = Math.Max(0, MantissaSize(diff));
return new BigFloat(diff, r1.Scale < r2.Scale ? r2.Scale : r1.Scale, size);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator -(BigFloat r1, int r2)
=> r1 + (-r2);
public static BigFloat PowerOf2(BigFloat val)
{
BigInteger prod = val._mantissa * val._mantissa;
int resSize = MantissaSize(prod);
int shrinkBy = resSize - val._size;
prod = RoundingRightShift(prod, shrinkBy, ref resSize);
int resScalePart = (2 * val.Scale) + shrinkBy - GuardBits;
BigFloat res = new(prod, resScalePart, resSize);
AssertValid(res);
return res;
}
///
/// Calculates a BigFloat to the power of 2 with a maximum output precision required.
/// This function can save on compute cycles by not calculating bits that are needed.
///
/// The base.
/// The maximum number of bits needed in the output.
/// Returns a BigFloat that is val^exp where the precision is
public static BigFloat PowerOf2(BigFloat val, int maxOutputPrecisionInBits)
{
/* valSz resSize skipIf
* 3 5-6 maxOutputPrecisionInBits >= valSz*2
* 4 7-8
* 5 9-10 */
int overSized = (val._size * 2) - maxOutputPrecisionInBits - (2 * GuardBits);
// We can just use PowerOf2() since output will never be larger then maxOutputPrecisionInBits.
if (overSized <= 1)
{
BigFloat p2 = PowerOf2(val);
// if size difference is 1 BUT the outputSize is still correct just return
if (overSized <= 0 || p2._size == maxOutputPrecisionInBits)
{
return p2;
}
// output is oversized by 1
return new BigFloat(p2._mantissa, p2.Scale - 1, p2._size);
}
int inputShink = (overSized + 1) / 2;
BigInteger valWithLessPrec = val._mantissa >> inputShink;
BigInteger prod = valWithLessPrec * valWithLessPrec;
int resBitLen = MantissaSize(prod);
int shrinkBy = resBitLen - val._size - (2 * GuardBits);
int sizePart = resBitLen - shrinkBy;
prod = RoundingRightShift(prod, shrinkBy);
int resScalePart = (2 * val.Scale) + shrinkBy - GuardBits;
return new(prod, resScalePart, sizePart);
}
///
/// Enhanced multiplication with adaptive algorithm selection
///
public static BigFloat operator *(BigFloat a, BigFloat b)
{
// Early exit for zero operands
if (a.IsStrictZero || b.IsStrictZero) return ZeroWithAccuracy(Math.Min(a.Accuracy, b.Accuracy));
return Multiply(a, b);
}
///
/// Standard multiplication algorithm with optimizations
///
public BigFloat Multiply(BigFloat other)
{
return Multiply(this, other);
}
public static BigFloat Multiply(BigFloat a, BigFloat b)
{
BigInteger prod;
int shouldBe;
const int SKIP_IF_SIZE_DIFF_SMALLER = 32;
const int KEEP_EXTRA_PREC = 16;
// future: for performance, what about no shift when _sizes are around the same size. (like within 32)
int sizeDiff = a._size - b._size;
int shiftBy = Math.Max(0, Math.Abs(sizeDiff) - KEEP_EXTRA_PREC);
// for size differences that are:
// 0 to 31(SKIP_IF_SIZE_DIFF_SMALLER), no shift takes place (saves time on shift and increases precision on the LSB in rare cases)
// > 32, there is a shift of 16 or more (but size difference will be limited to 16 for extra precision)
if (Math.Abs(sizeDiff) < SKIP_IF_SIZE_DIFF_SMALLER)
{
shiftBy = 0;
prod = b._mantissa * a._mantissa;
shouldBe = Math.Min(a._size, b._size);
}
else if (sizeDiff > 0)
{
prod = (a._mantissa >> shiftBy) * b._mantissa;
shouldBe = b._size;
}
else
{
prod = (b._mantissa >> shiftBy) * a._mantissa;
shouldBe = a._size;
}
int sizePart = MantissaSize(prod);
int shrinkBy = sizePart - shouldBe;
prod = RoundingRightShift(prod, shrinkBy, ref sizePart);
int resScalePart = a.Scale + b.Scale + shrinkBy + shiftBy - GuardBits;
return new BigFloat(prod, resScalePart, sizePart);
}
public static BigFloat operator *(BigFloat a, int b)
{
// zero and sign-only special cases
if (b == 0) { return ZeroWithAccuracy(a.Accuracy); }
if (b == 1) { return a; }
if (b == -1) { return -a; }
uint ub = b == int.MinValue ? 0x80000000u : (uint)Math.Abs(b);
int sign = b < 0 ? -1 : 1;
// small constant fast paths (remaining values: 2, 3, 4)
if (ub <= 4)
{
if ((ub & (ub - 1)) == 0)
{
// 2 or 4 : adjust exponent only
int k = BitOperations.TrailingZeroCount(ub);
BigInteger pow2Mantissa = sign < 0 ? BigInteger.Negate(a._mantissa) : a._mantissa;
return new BigFloat(pow2Mantissa, a.Scale + k, a._size);
}
// 3 : shift-and-add to avoid a full BigInteger multiply
BigInteger tripleMantissa = (a._mantissa << 1) + a._mantissa;
if (sign < 0)
{
tripleMantissa = BigInteger.Negate(tripleMantissa);
}
int localSizePart = MantissaSize(tripleMantissa);
int localShrinkBy = localSizePart - a._size;
if (localShrinkBy > 0)
{
tripleMantissa = RoundingRightShift(tripleMantissa, localShrinkBy, ref localSizePart);
}
return new BigFloat(tripleMantissa, a.Scale + localShrinkBy, localSizePart);
}
// Power-of-two multipliers: adjust exponent only
if ((ub & (ub - 1)) == 0)
{
int k = BitOperations.TrailingZeroCount(ub);
BigInteger powerMantissa = sign < 0 ? BigInteger.Negate(a._mantissa) : a._mantissa;
return new BigFloat(powerMantissa, a.Scale + k, a._size);
}
// General multiplication with size management
BigInteger mant = BigInteger.Multiply(a._mantissa, b);
int sizePart = MantissaSize(mant);
int shrinkBy = sizePart - a._size;
if (shrinkBy > 0)
{
mant = RoundingRightShift(mant, shrinkBy, ref sizePart);
}
return new BigFloat(mant, a.Scale + shrinkBy, sizePart);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator *(int a, BigFloat b)
{
return b * a;
}
///
/// Enhanced division by integer with optimized algorithm selection
///
public static BigFloat operator /(BigFloat divisor, int dividend)
{
if (dividend == 0) { throw new DivideByZeroException(); }
if (divisor.IsZero) { return ZeroWithAccuracy(-divisor.Size); }
if (dividend == 1) { return divisor; }
if (dividend == -1) { return -divisor; }
uint absDividend = dividend == int.MinValue ? 0x80000000u : (uint)Math.Abs(dividend);
int sign = Math.Sign(dividend) * divisor._mantissa.Sign;
// Optimize for powers of 2
if ((absDividend & (absDividend - 1)) == 0)
{
int k = BitOperations.TrailingZeroCount(absDividend);
return new BigFloat(
BigInteger.Abs(divisor._mantissa) * sign,
divisor.Scale - k,
divisor._size
);
}
// Small divisor fast path (remaining values: 3)
if (absDividend <= 4)
{
return divisor / new BigFloat(new BigInteger(dividend));
}
return divisor / new BigFloat(new BigInteger(dividend));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static BigFloat operator /(int a, BigFloat b)
{
return new BigFloat(a) / b;
}
///////////////////////// Explicit CASTS /////////////////////////
/// Defines an explicit conversion of a System.Decimal object to a BigFloat.
// future: public static explicit operator BigFloat(decimal input) => new BigFloat(input);
/// Defines an explicit conversion of a BigFloat to a unsigned byte.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator byte(BigFloat value)
{
return checked((byte)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a signed byte.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator sbyte(BigFloat value)
{
return checked((sbyte)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a unsigned 16-bit integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator ushort(BigFloat value)
{
return checked((ushort)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a 16-bit signed integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator short(BigFloat value)
{
return checked((short)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a 32-bit signed integer.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator int(BigFloat value)
{
return checked((int)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a unsigned 32-bit integer input.
/// The fractional part (including guard bits) are simply discarded.
public static explicit operator uint(BigFloat value)
{
return checked((uint)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a unsigned 64-bit integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator ulong(BigFloat value)
{
return checked((ulong)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a 64-bit signed integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator long(BigFloat value)
{
return checked((long)GetIntegralValue(value));
}
/// Defines an explicit conversion of a BigFloat to a unsigned 128-bit integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator UInt128(BigFloat value)
{
return (UInt128)GetRoundedMantissa(value._mantissa << value.Scale);
}
/// Defines an explicit conversion of a BigFloat to a signed 128-bit integer.
/// The fractional part (including GuardBits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator Int128(BigFloat value)
{
return (Int128)GetRoundedMantissa(value._mantissa << value.Scale);
}
/// Casts a BigFloat to a BigInteger. The fractional part (including guard bits) are simply discarded.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator BigInteger(BigFloat value)
{
return GetIntegralValue(value);
}
///
/// Converts this to a by discarding the fractional part (including guard bits).
///
public BigInteger ToBigInteger()
{
return GetIntegralValue(this);
}
///
/// Casts a BigInteger to a BigFloat. The GuardBits are set to zero.
/// Example: a BigInteger of 1 would translate to "1+GuardBits" bits of precision.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator BigFloat(BigInteger value)
{
return new BigFloat(value);
}
/// Defines an explicit conversion of a System.Double to a BigFloat.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static explicit operator BigFloat(double value)
{
return new BigFloat(value);
}
///
/// Defines an explicit conversion of a BigFloat to a Double with IEEE‑754 semantics
/// (round to nearest, ties to even). Handles normal, subnormal, overflow, and underflow.
/// Precision is limited to 53 bits by IEEE‑754.
///
public static explicit operator double(BigFloat value)
{
// Local helper: right-shift with round-to-nearest, ties-to-even.
static BigInteger ShiftRightRoundEven(BigInteger mag, int shift)
{
if (shift <= 0) return mag;
BigInteger q = mag >> shift;
BigInteger r = mag - (q << shift);
BigInteger half = BigInteger.One << (shift - 1);
bool tie = r == half;
bool up = r > half || (tie && ((q & BigInteger.One) != BigInteger.Zero));
return up ? q + 1 : q;
}
if (value._mantissa.IsZero) return 0.0;
bool neg = value._mantissa.Sign < 0;
BigInteger absMant = BigInteger.Abs(value._mantissa);
int L = value._size; // total bits in mantissa (incl. guard)
long E = value.BinaryExponent; // unbiased exponent for normalized (1.x)·2^E
long e = E + 1023; // biased exponent
// Overflow → ±Infinity
if (e > 2046) return neg ? double.NegativeInfinity : double.PositiveInfinity;
// Normalized numbers (1 ≤ |·| < 2)
if (e >= 1)
{
// Provide exactly 53 significant bits (incl. hidden 1)
BigInteger sig53 = L > 53
? ShiftRightRoundEven(absMant, L - 53)
: (absMant << (53 - L));
// Handle carry that turns 1.111.. → 10.000..
if (sig53 == (BigInteger.One << 53))
{
sig53 >>= 1;
e++;
if (e > 2046) return neg ? double.NegativeInfinity : double.PositiveInfinity;
}
ulong mantField = (ulong)(sig53 & ((BigInteger.One << 52) - 1)); // drop leading 1
long bits = ((long)e << 52) | (long)mantField;
if (neg) bits |= (1L << 63);
return BitConverter.Int64BitsToDouble(bits);
}
// Subnormals (e <= 0). Compute n = round(|x| * 2^1074).
int shift = value.Scale - GuardBits + 1074;
BigInteger n = shift >= 0
? (absMant << shift)
: ShiftRightRoundEven(absMant, -shift);
if (n.IsZero) return neg ? -0.0 : 0.0; // underflow to signed zero
// Rounding can push into the smallest normal (ed=1, mantissa=0)
BigInteger two52 = BigInteger.One << 52;
if (n >= two52)
{
long bits = 1L << 52; // exponent field = 1, fraction = 0
if (neg) bits |= (1L << 63);
return BitConverter.Int64BitsToDouble(bits);
}
long sub = (long)n; // fits in 52 bits
long bitsSub = sub; // exponent field = 0
if (neg) bitsSub |= (1L << 63);
return BitConverter.Int64BitsToDouble(bitsSub);
}
///
/// Defines an explicit conversion of a BigFloat to a Single (float) with IEEE‑754 semantics
/// (round to nearest, ties to even). Precision is limited to 24 bits (incl. hidden 1).
///
public static explicit operator float(BigFloat value)
{
// Local helper (see double converter).
static BigInteger ShiftRightRoundEven(BigInteger mag, int shift)
{
if (shift <= 0) return mag;
BigInteger q = mag >> shift;
BigInteger r = mag - (q << shift);
BigInteger half = BigInteger.One << (shift - 1);
bool tie = r == half;
bool up = r > half || (tie && ((q & BigInteger.One) != BigInteger.Zero));
return up ? q + 1 : q;
}
if (value._mantissa.IsZero) return 0.0f;
bool neg = value._mantissa.Sign < 0;
BigInteger absMant = BigInteger.Abs(value._mantissa);
int L = value._size;
int E = value.BinaryExponent;
int e = E + 127;
// Overflow → ±Infinity
if (e > 254) return neg ? float.NegativeInfinity : float.PositiveInfinity;
// Normalized
if (e >= 1)
{
BigInteger sig24 = L > 24
? ShiftRightRoundEven(absMant, L - 24)
: (absMant << (24 - L));
if (sig24 == (BigInteger.One << 24))
{
sig24 >>= 1;
e++;
if (e > 254) return neg ? float.NegativeInfinity : float.PositiveInfinity;
}
uint mantField = (uint)(sig24 & ((BigInteger.One << 23) - 1));
int bits = (e << 23) | (int)mantField;
if (neg) bits |= 1 << 31;
return BitConverter.Int32BitsToSingle(bits);
}
// Subnormals: n = round(|x| * 2^149)
int shift = value.Scale - GuardBits + 149;
BigInteger n = shift >= 0
? (absMant << shift)
: ShiftRightRoundEven(absMant, -shift);
if (n.IsZero) return neg ? -0.0f : 0.0f;
BigInteger two23 = BigInteger.One << 23;
if (n >= two23)
{
int bits = 1 << 23; // smallest normal
if (neg) bits |= 1 << 31;
return BitConverter.Int32BitsToSingle(bits);
}
int sub = (int)n; // fits in 23 bits
if (neg) sub |= 1 << 31;
return BitConverter.Int32BitsToSingle(sub);
}
///
/// Round-to-Nearest at '.' using only the first fractional bit (ignores guard bits), then truncate.
/// No round-to-even (ties go away-from-zero implicitly by using the top fractional bit only).
///
public static int ToNearestInt(BigFloat x)
{
if (x.IsZero) return 0;
// Ignore guard bits entirely: drop them WITHOUT rounding
BigInteger mNoGuard = (x._mantissa.Sign >= 0)
? (x._mantissa >> GuardBits)
: -((-x._mantissa) >> GuardBits);
if (x.Scale >= 0)
{
// No working fractional field; just scale up to an integer
BigInteger whole = mNoGuard << x.Scale;
return checked((int)whole);
}
else
{
int fracBits = -x.Scale; // # of working fractional bits
BigInteger trunc = (mNoGuard.Sign >= 0) // truncate toward zero at '.'
? (mNoGuard >> fracBits)
: -((-mNoGuard) >> fracBits);
// Look only at the first fractional bit right after '.'
bool roundUp = false;
if (fracBits > 0)
{
BigInteger abs = BigInteger.Abs(mNoGuard);
roundUp = ((abs >> (fracBits - 1)) & BigInteger.One) == BigInteger.One;
}
if (roundUp)
{
trunc += (mNoGuard.Sign >= 0) ? BigInteger.One : -BigInteger.One;
}
return checked((int)trunc);
}
}
///
/// Returns the bit-length of a mantissa using absolute value to keep
/// power-of-two negatives consistent with positives. Returns 0 for zero.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int MantissaSize(BigInteger value)
=> (int)BigInteger.Abs(value).GetBitLength();
///
/// Checks whether this BigFloat struct holds a valid internal state.
/// Returns true if valid; otherwise false.
///
public bool Validate()
{
int realSize = MantissaSize(_mantissa);
bool valid = _size == realSize;
Debug.Assert(valid,
$"Invalid BigFloat: _size({_size}) does not match actual bit length ({realSize}).");
return valid;
}
[Conditional("DEBUG")]
private void AssertValid()
{
_ = Validate();
}
[Conditional("DEBUG")]
private static void AssertValid(BigFloat val)
{
val.AssertValid();
}
}