mirror of
https://gitee.com/Lamdonn/varch.git
synced 2025-12-06 16:56:42 +08:00
Add floatl_to_d() function
This commit is contained in:
parent
c97a43dd6f
commit
74119f6c8a
@ -6,7 +6,7 @@
|
||||
* \unit floatl
|
||||
* \brief This is a simple large float number calculate module for C language
|
||||
* \author Lamdonn
|
||||
* \version v1.1.0
|
||||
* \version v1.1.1
|
||||
* \license GPL-2.0
|
||||
* \copyright Copyright (C) 2023 Lamdonn.
|
||||
********************************************************************************************************/
|
||||
@ -1731,6 +1731,172 @@ floatl floatl_from_d(double value)
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Converts a custom floating-point type (floatl) to a double precision floating-point number.
|
||||
* \param a: The input value of the custom floating-point type (floatl) that needs to be converted.
|
||||
* \return A double precision floating-point number which is the converted result of the input floatl value.
|
||||
*
|
||||
* This function is mainly responsible for converting a value of the custom floating-point type 'floatl'
|
||||
* to a standard double precision floating-point number. It first checks if the 'floatl' type uses 64 bits
|
||||
* for storage by checking the preprocessor macro 'FLOATL_USE_64BITS'. If it does, it simply copies the memory
|
||||
* of the 'floatl' variable to a 'double_u' structure and returns the corresponding floating-point value.
|
||||
* Otherwise, it proceeds with a series of operations for the conversion.
|
||||
*/
|
||||
double floatl_to_d(floatl a)
|
||||
{
|
||||
double_u du;
|
||||
|
||||
// If the 'floatl' type uses 64 bits (same as the size of 'double'), directly copy the memory
|
||||
// and return the stored value as a 'double'.
|
||||
#if defined(FLOATL_USE_64BITS)
|
||||
memcpy(&du, &a, sizeof(double));
|
||||
return du.float_;
|
||||
#endif
|
||||
|
||||
// Extract the exponent part from the input 'floatl' value. Subtract the middle value of the exponent
|
||||
// range defined for 'floatl' (__FLOATL_EXP_MID_VALUE__) to get a relative exponent value.
|
||||
int32_t exp = a.exponent - __FLOATL_EXP_MID_VALUE__;
|
||||
// Create a new 'floatl' variable'mant' initialized with the input 'a', and then set its sign and exponent
|
||||
// to 0 to isolate just the mantissa part of the original 'floatl' value.
|
||||
floatl mant = a; mant.sign = 0; mant.exponent = 0U;
|
||||
// Set the sign of the result to be the same as the sign of the input 'floatl' value.
|
||||
bool sign = a.sign;
|
||||
|
||||
// Handle special cases such as NaN (Not a Number) and infinity. If the exponent of the input 'floatl'
|
||||
// is equal to the whole value of the exponent range defined for 'floatl' (__FLOATL_EXP_WHL_VALUE__),
|
||||
// it indicates a special value.
|
||||
if (a.exponent == __FLOATL_EXP_WHL_VALUE__)
|
||||
{
|
||||
// If the mantissa has a sign (checked by 'floatl_int_sign' function), set the high and low parts
|
||||
// of the 'int_' structure within 'double_u' to all 1s (representing a special NaN or infinity case).
|
||||
if (floatl_int_sign(mant))
|
||||
{
|
||||
du.int_.high = 0xFFFFFFFF;
|
||||
du.int_.low = 0xFFFFFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, set the high and low parts to 0.
|
||||
du.int_.high = 0;
|
||||
du.int_.low = 0;
|
||||
}
|
||||
|
||||
// Set the sign and exponent of the 'double_u' structure according to the rules for special values.
|
||||
du.sign = sign;
|
||||
du.exponent = 2047;
|
||||
|
||||
// Return the resulting 'double' value representing the special value.
|
||||
return du.float_;
|
||||
}
|
||||
|
||||
// Adjust the extracted exponent to fit within the exponent range of a standard double precision
|
||||
// floating-point number (-1022 to 1023). Add 1023 to convert it to the biased exponent form
|
||||
// used by doubles.
|
||||
exp += 1023;
|
||||
|
||||
// Check for overflow. If the adjusted exponent is greater than 2047 (the maximum exponent value
|
||||
// for a double), it means an overflow has occurred, and we return positive or negative infinity
|
||||
// depending on the sign.
|
||||
if (exp > 2047)
|
||||
{
|
||||
du.int_.high = 0;
|
||||
du.int_.low = 0;
|
||||
du.sign = sign;
|
||||
du.exponent = 2047;
|
||||
|
||||
return du.float_;
|
||||
}
|
||||
// Check for underflow. If the adjusted exponent is less than 0, it means an underflow has occurred,
|
||||
// and we return 0.
|
||||
else if (exp < 0)
|
||||
{
|
||||
du.int_.high = 0;
|
||||
du.int_.low = 0;
|
||||
du.sign = sign;
|
||||
du.exponent = 0;
|
||||
|
||||
return du.float_;
|
||||
}
|
||||
|
||||
// Add the hidden bit (which is 1 in the normalized representation of floating-point numbers)
|
||||
// to the mantissa. Use the 'floatl_int_or' function to perform a bitwise OR operation to set the
|
||||
// hidden bit.
|
||||
mant = floatl_int_or(mant, __FLOATL_HIDE_MANT_BIT__);
|
||||
|
||||
// Calculate the number of bits that need to be discarded from the mantissa to fit it into the
|
||||
// 52-bit mantissa of a double precision floating-point number. Subtract 52 from the total number
|
||||
// of mantissa bits defined for 'floatl' (__FLOATL_MANT_BITS__).
|
||||
int32_t discard_bits = __FLOATL_MANT_BITS__ - 52;
|
||||
|
||||
// Keep only the highest 52 bits of the mantissa. Use the 'floatl_int_shr' function to shift
|
||||
// the mantissa right by the number of bits to be discarded.
|
||||
floatl double_mant = floatl_int_shr(mant, discard_bits);
|
||||
|
||||
// Create a 'floatl' variable 'int1' representing the integer value 1. Initialize its first 32-bit
|
||||
// part (u32[0]) to 1.
|
||||
floatl int1 = __FLOATL_CONST_0__; int1.u32[0] = 1;
|
||||
|
||||
// Get the highest bit among the bits to be discarded. Use the 'floatl_int_shl' function to shift
|
||||
// 'int1' left by the number of bits equal to the number of bits to be discarded minus 1.
|
||||
// This bit will be used for rounding purposes.
|
||||
floatl high_bit = floatl_int_shl(int1, discard_bits - 1);
|
||||
|
||||
// Get the bits that will be discarded from the mantissa. Use the 'floatl_int_and' function to perform
|
||||
// a bitwise AND operation between the original mantissa and a value that represents the bits to be
|
||||
// discarded (formed by first shifting 'int1' left by the number of bits to be discarded and then
|
||||
// decrementing it using 'floatl_int_dec').
|
||||
floatl round_bits = floatl_int_and(mant, floatl_int_dec(floatl_int_shl(int1, discard_bits)));
|
||||
|
||||
// Determine whether rounding is needed. If the bits to be discarded ('round_bits') are greater than
|
||||
// the highest bit among them ('high_bit'), or if they are equal and the lowest bit of the remaining
|
||||
// mantissa ('double_mant') is 1 (checked by 'floatl_int_sign' function), then rounding is required.
|
||||
if (floatl_int_cmp(round_bits, high_bit) > 0 ||
|
||||
(floatl_int_cmp(round_bits, high_bit) == 0 && floatl_int_sign(floatl_int_and(double_mant, int1))))
|
||||
{
|
||||
// Perform rounding by incrementing the remaining mantissa. Use the 'floatl_int_inc' function.
|
||||
floatl_int_inc(double_mant);
|
||||
|
||||
// Check if the increment of the mantissa causes a carry to the exponent part. If the sign bit
|
||||
// of the result of a bitwise AND operation between the remaining mantissa and a value representing
|
||||
// the bit position 2 bits higher than the hidden bit position (shifted 'int1' left by 52 + 2)
|
||||
// is 1, it means there is a carry to the exponent.
|
||||
if (floatl_int_sign(floatl_int_and(double_mant, floatl_int_shl(int1, 52 + 2))))
|
||||
{
|
||||
// If there is a carry to the exponent, shift the remaining mantissa right by 1 bit
|
||||
// to make room for the carry in the exponent. Use the 'floatl_int_shr' function.
|
||||
double_mant = floatl_int_shr(double_mant, 1);
|
||||
// Increment the exponent by 1.
|
||||
exp += 1;
|
||||
|
||||
// Check for a secondary overflow of the exponent. If the exponent after the increment is
|
||||
// greater than 2047, it means another overflow has occurred, and we return positive or
|
||||
// negative infinity depending on the sign.
|
||||
if (exp > 2047)
|
||||
{
|
||||
du.int_.high = 0;
|
||||
du.int_.low = 0;
|
||||
du.sign = sign;
|
||||
du.exponent = 2047;
|
||||
return du.float_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Combine the processed mantissa, sign, and exponent to form the final 'double' value.
|
||||
// Set the high and low parts of the 'int_' structure within 'double_u' to the corresponding parts
|
||||
// of the remaining mantissa.
|
||||
du.int_.high = double_mant.u32[1];
|
||||
du.int_.low = double_mant.u32[0];
|
||||
// Set the sign of the 'double_u' structure.
|
||||
du.sign = sign;
|
||||
// Set the exponent of the 'double_u' structure.
|
||||
du.exponent = exp;
|
||||
|
||||
// Return the final converted double precision floating-point number.
|
||||
return du.float_;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the sign of a 'floatl' number.
|
||||
* \param[in] a: The 'floatl' number whose sign is to be determined.
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
* \unit floatl
|
||||
* \brief This is a simple large float number calculate module for C language
|
||||
* \author Lamdonn
|
||||
* \version v1.1.0
|
||||
* \version v1.1.1
|
||||
* \license GPL-2.0
|
||||
* \copyright Copyright (C) 2023 Lamdonn.
|
||||
********************************************************************************************************/
|
||||
@ -25,7 +25,7 @@
|
||||
|
||||
#define FLOATL_V_MAJOR 1
|
||||
#define FLOATL_V_MINOR 1
|
||||
#define FLOATL_V_PATCH 0
|
||||
#define FLOATL_V_PATCH 1
|
||||
|
||||
/**
|
||||
* \brief Common constant definitions
|
||||
@ -151,9 +151,11 @@ floatl floatl_round(floatl a);
|
||||
// Conversion functions
|
||||
// floatl_from: Converts a string to a floatl number
|
||||
// floatl_from_d: Converts a double-precision floating-point number (double) to a floatl number
|
||||
// floatl_to_d: Converts a floatl to a double-precision floating-point number (double) number
|
||||
|
||||
floatl floatl_from(const char *str);
|
||||
floatl floatl_from_d(double value);
|
||||
double floatl_to_d(floatl a);
|
||||
|
||||
// Output function
|
||||
// Converts a floatl number to a string according to the specified format and stores it in the buffer
|
||||
|
||||
@ -243,6 +243,57 @@ static double double_from_uint64(uint64_t value)
|
||||
return v.double_;
|
||||
}
|
||||
|
||||
float double_to_float(double d)
|
||||
{
|
||||
// 联合体用于直接操作二进制位
|
||||
union { double d; uint64_t u; } du = { .d = d };
|
||||
union { float f; uint32_t u; } fu;
|
||||
|
||||
// 提取double的符号、指数、尾数
|
||||
uint64_t sign = (du.u >> 63) & 0x1;
|
||||
int64_t exp = ((du.u >> 52) & 0x7FF) - 1023; // 原始指数
|
||||
uint64_t mant = du.u & 0x000FFFFFFFFFFFFF; // 52位尾数
|
||||
|
||||
// 特殊值处理(NaN/Inf)
|
||||
if (exp == 1024) {
|
||||
fu.u = (sign << 31) | 0x7F800000 | (mant ? 0x7FFFFF : 0);
|
||||
return fu.f;
|
||||
}
|
||||
|
||||
// 调整指数到float范围(-126~127)
|
||||
exp += 127; // 转换为float偏置指数
|
||||
if (exp > 255) { // 上溢返回无穷大
|
||||
fu.u = (sign << 31) | 0x7F800000;
|
||||
return fu.f;
|
||||
} else if (exp < 0) { // 下溢返回0
|
||||
fu.u = sign << 31;
|
||||
return fu.f;
|
||||
}
|
||||
|
||||
// 尾数处理(隐含的1 + 52位尾数)
|
||||
uint64_t extended_mant = mant | 0x0010000000000000; // 恢复隐含的1
|
||||
uint32_t float_mant = (extended_mant >> 29); // 保留高23位
|
||||
|
||||
// 舍入处理(检查第29位)
|
||||
uint32_t round_bits = extended_mant & 0x1FFFFFFF;
|
||||
if (round_bits > 0x10000000 ||
|
||||
(round_bits == 0x10000000 && (float_mant & 0x1))) {
|
||||
float_mant += 1;
|
||||
if (float_mant & 0x00800000) { // 尾数进位导致指数进位
|
||||
float_mant >>= 1;
|
||||
exp += 1;
|
||||
if (exp > 255) { // 指数二次上溢
|
||||
fu.u = (sign << 31) | 0x7F800000;
|
||||
return fu.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 组合结果
|
||||
fu.u = (sign << 31) | (exp << 23) | (float_mant & 0x007FFFFF);
|
||||
return fu.f;
|
||||
}
|
||||
|
||||
// 将 double 的二进制表示转换为 uint64_t
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
@ -1562,6 +1613,8 @@ static void test_base(void)
|
||||
printf("floatl_ceil %s\r\n", floatl_show(floatl_ceil(floatl(v)), buffer, sizeof(buffer), "%f"));
|
||||
printf("floatl_floor %s\r\n", floatl_show(floatl_floor(floatl(v)), buffer, sizeof(buffer), "%f"));
|
||||
printf("floatl_round %s\r\n", floatl_show(floatl_round(floatl(v)), buffer, sizeof(buffer), "%f"));
|
||||
|
||||
printf("to_d %f\r\n", floatl_to_d(FLOATL_PI));
|
||||
}
|
||||
|
||||
/************************************************************************************/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user