Add floatl_to_d() function

This commit is contained in:
Lamdonn 2025-03-18 21:53:10 +08:00
parent c97a43dd6f
commit 74119f6c8a
3 changed files with 224 additions and 3 deletions

View File

@ -6,7 +6,7 @@
* \unit floatl * \unit floatl
* \brief This is a simple large float number calculate module for C language * \brief This is a simple large float number calculate module for C language
* \author Lamdonn * \author Lamdonn
* \version v1.1.0 * \version v1.1.1
* \license GPL-2.0 * \license GPL-2.0
* \copyright Copyright (C) 2023 Lamdonn. * \copyright Copyright (C) 2023 Lamdonn.
********************************************************************************************************/ ********************************************************************************************************/
@ -1731,6 +1731,172 @@ floatl floatl_from_d(double value)
return result; return result;
} }
/**
* \brief Converts a custom floating-point type (floatl) to a double precision floating-point number.
* \param a: The input value of the custom floating-point type (floatl) that needs to be converted.
* \return A double precision floating-point number which is the converted result of the input floatl value.
*
* This function is mainly responsible for converting a value of the custom floating-point type 'floatl'
* to a standard double precision floating-point number. It first checks if the 'floatl' type uses 64 bits
* for storage by checking the preprocessor macro 'FLOATL_USE_64BITS'. If it does, it simply copies the memory
* of the 'floatl' variable to a 'double_u' structure and returns the corresponding floating-point value.
* Otherwise, it proceeds with a series of operations for the conversion.
*/
double floatl_to_d(floatl a)
{
double_u du;
// If the 'floatl' type uses 64 bits (same as the size of 'double'), directly copy the memory
// and return the stored value as a 'double'.
#if defined(FLOATL_USE_64BITS)
memcpy(&du, &a, sizeof(double));
return du.float_;
#endif
// Extract the exponent part from the input 'floatl' value. Subtract the middle value of the exponent
// range defined for 'floatl' (__FLOATL_EXP_MID_VALUE__) to get a relative exponent value.
int32_t exp = a.exponent - __FLOATL_EXP_MID_VALUE__;
// Create a new 'floatl' variable'mant' initialized with the input 'a', and then set its sign and exponent
// to 0 to isolate just the mantissa part of the original 'floatl' value.
floatl mant = a; mant.sign = 0; mant.exponent = 0U;
// Set the sign of the result to be the same as the sign of the input 'floatl' value.
bool sign = a.sign;
// Handle special cases such as NaN (Not a Number) and infinity. If the exponent of the input 'floatl'
// is equal to the whole value of the exponent range defined for 'floatl' (__FLOATL_EXP_WHL_VALUE__),
// it indicates a special value.
if (a.exponent == __FLOATL_EXP_WHL_VALUE__)
{
// If the mantissa has a sign (checked by 'floatl_int_sign' function), set the high and low parts
// of the 'int_' structure within 'double_u' to all 1s (representing a special NaN or infinity case).
if (floatl_int_sign(mant))
{
du.int_.high = 0xFFFFFFFF;
du.int_.low = 0xFFFFFFFF;
}
else
{
// Otherwise, set the high and low parts to 0.
du.int_.high = 0;
du.int_.low = 0;
}
// Set the sign and exponent of the 'double_u' structure according to the rules for special values.
du.sign = sign;
du.exponent = 2047;
// Return the resulting 'double' value representing the special value.
return du.float_;
}
// Adjust the extracted exponent to fit within the exponent range of a standard double precision
// floating-point number (-1022 to 1023). Add 1023 to convert it to the biased exponent form
// used by doubles.
exp += 1023;
// Check for overflow. If the adjusted exponent is greater than 2047 (the maximum exponent value
// for a double), it means an overflow has occurred, and we return positive or negative infinity
// depending on the sign.
if (exp > 2047)
{
du.int_.high = 0;
du.int_.low = 0;
du.sign = sign;
du.exponent = 2047;
return du.float_;
}
// Check for underflow. If the adjusted exponent is less than 0, it means an underflow has occurred,
// and we return 0.
else if (exp < 0)
{
du.int_.high = 0;
du.int_.low = 0;
du.sign = sign;
du.exponent = 0;
return du.float_;
}
// Add the hidden bit (which is 1 in the normalized representation of floating-point numbers)
// to the mantissa. Use the 'floatl_int_or' function to perform a bitwise OR operation to set the
// hidden bit.
mant = floatl_int_or(mant, __FLOATL_HIDE_MANT_BIT__);
// Calculate the number of bits that need to be discarded from the mantissa to fit it into the
// 52-bit mantissa of a double precision floating-point number. Subtract 52 from the total number
// of mantissa bits defined for 'floatl' (__FLOATL_MANT_BITS__).
int32_t discard_bits = __FLOATL_MANT_BITS__ - 52;
// Keep only the highest 52 bits of the mantissa. Use the 'floatl_int_shr' function to shift
// the mantissa right by the number of bits to be discarded.
floatl double_mant = floatl_int_shr(mant, discard_bits);
// Create a 'floatl' variable 'int1' representing the integer value 1. Initialize its first 32-bit
// part (u32[0]) to 1.
floatl int1 = __FLOATL_CONST_0__; int1.u32[0] = 1;
// Get the highest bit among the bits to be discarded. Use the 'floatl_int_shl' function to shift
// 'int1' left by the number of bits equal to the number of bits to be discarded minus 1.
// This bit will be used for rounding purposes.
floatl high_bit = floatl_int_shl(int1, discard_bits - 1);
// Get the bits that will be discarded from the mantissa. Use the 'floatl_int_and' function to perform
// a bitwise AND operation between the original mantissa and a value that represents the bits to be
// discarded (formed by first shifting 'int1' left by the number of bits to be discarded and then
// decrementing it using 'floatl_int_dec').
floatl round_bits = floatl_int_and(mant, floatl_int_dec(floatl_int_shl(int1, discard_bits)));
// Determine whether rounding is needed. If the bits to be discarded ('round_bits') are greater than
// the highest bit among them ('high_bit'), or if they are equal and the lowest bit of the remaining
// mantissa ('double_mant') is 1 (checked by 'floatl_int_sign' function), then rounding is required.
if (floatl_int_cmp(round_bits, high_bit) > 0 ||
(floatl_int_cmp(round_bits, high_bit) == 0 && floatl_int_sign(floatl_int_and(double_mant, int1))))
{
// Perform rounding by incrementing the remaining mantissa. Use the 'floatl_int_inc' function.
floatl_int_inc(double_mant);
// Check if the increment of the mantissa causes a carry to the exponent part. If the sign bit
// of the result of a bitwise AND operation between the remaining mantissa and a value representing
// the bit position 2 bits higher than the hidden bit position (shifted 'int1' left by 52 + 2)
// is 1, it means there is a carry to the exponent.
if (floatl_int_sign(floatl_int_and(double_mant, floatl_int_shl(int1, 52 + 2))))
{
// If there is a carry to the exponent, shift the remaining mantissa right by 1 bit
// to make room for the carry in the exponent. Use the 'floatl_int_shr' function.
double_mant = floatl_int_shr(double_mant, 1);
// Increment the exponent by 1.
exp += 1;
// Check for a secondary overflow of the exponent. If the exponent after the increment is
// greater than 2047, it means another overflow has occurred, and we return positive or
// negative infinity depending on the sign.
if (exp > 2047)
{
du.int_.high = 0;
du.int_.low = 0;
du.sign = sign;
du.exponent = 2047;
return du.float_;
}
}
}
// Combine the processed mantissa, sign, and exponent to form the final 'double' value.
// Set the high and low parts of the 'int_' structure within 'double_u' to the corresponding parts
// of the remaining mantissa.
du.int_.high = double_mant.u32[1];
du.int_.low = double_mant.u32[0];
// Set the sign of the 'double_u' structure.
du.sign = sign;
// Set the exponent of the 'double_u' structure.
du.exponent = exp;
// Return the final converted double precision floating-point number.
return du.float_;
}
/** /**
* \brief Get the sign of a 'floatl' number. * \brief Get the sign of a 'floatl' number.
* \param[in] a: The 'floatl' number whose sign is to be determined. * \param[in] a: The 'floatl' number whose sign is to be determined.

View File

@ -7,7 +7,7 @@
* \unit floatl * \unit floatl
* \brief This is a simple large float number calculate module for C language * \brief This is a simple large float number calculate module for C language
* \author Lamdonn * \author Lamdonn
* \version v1.1.0 * \version v1.1.1
* \license GPL-2.0 * \license GPL-2.0
* \copyright Copyright (C) 2023 Lamdonn. * \copyright Copyright (C) 2023 Lamdonn.
********************************************************************************************************/ ********************************************************************************************************/
@ -25,7 +25,7 @@
#define FLOATL_V_MAJOR 1 #define FLOATL_V_MAJOR 1
#define FLOATL_V_MINOR 1 #define FLOATL_V_MINOR 1
#define FLOATL_V_PATCH 0 #define FLOATL_V_PATCH 1
/** /**
* \brief Common constant definitions * \brief Common constant definitions
@ -151,9 +151,11 @@ floatl floatl_round(floatl a);
// Conversion functions // Conversion functions
// floatl_from: Converts a string to a floatl number // floatl_from: Converts a string to a floatl number
// floatl_from_d: Converts a double-precision floating-point number (double) to a floatl number // floatl_from_d: Converts a double-precision floating-point number (double) to a floatl number
// floatl_to_d: Converts a floatl to a double-precision floating-point number (double) number
floatl floatl_from(const char *str); floatl floatl_from(const char *str);
floatl floatl_from_d(double value); floatl floatl_from_d(double value);
double floatl_to_d(floatl a);
// Output function // Output function
// Converts a floatl number to a string according to the specified format and stores it in the buffer // Converts a floatl number to a string according to the specified format and stores it in the buffer

View File

@ -243,6 +243,57 @@ static double double_from_uint64(uint64_t value)
return v.double_; return v.double_;
} }
float double_to_float(double d)
{
// 联合体用于直接操作二进制位
union { double d; uint64_t u; } du = { .d = d };
union { float f; uint32_t u; } fu;
// 提取double的符号、指数、尾数
uint64_t sign = (du.u >> 63) & 0x1;
int64_t exp = ((du.u >> 52) & 0x7FF) - 1023; // 原始指数
uint64_t mant = du.u & 0x000FFFFFFFFFFFFF; // 52位尾数
// 特殊值处理NaN/Inf
if (exp == 1024) {
fu.u = (sign << 31) | 0x7F800000 | (mant ? 0x7FFFFF : 0);
return fu.f;
}
// 调整指数到float范围-126~127
exp += 127; // 转换为float偏置指数
if (exp > 255) { // 上溢返回无穷大
fu.u = (sign << 31) | 0x7F800000;
return fu.f;
} else if (exp < 0) { // 下溢返回0
fu.u = sign << 31;
return fu.f;
}
// 尾数处理隐含的1 + 52位尾数
uint64_t extended_mant = mant | 0x0010000000000000; // 恢复隐含的1
uint32_t float_mant = (extended_mant >> 29); // 保留高23位
// 舍入处理检查第29位
uint32_t round_bits = extended_mant & 0x1FFFFFFF;
if (round_bits > 0x10000000 ||
(round_bits == 0x10000000 && (float_mant & 0x1))) {
float_mant += 1;
if (float_mant & 0x00800000) { // 尾数进位导致指数进位
float_mant >>= 1;
exp += 1;
if (exp > 255) { // 指数二次上溢
fu.u = (sign << 31) | 0x7F800000;
return fu.f;
}
}
}
// 组合结果
fu.u = (sign << 31) | (exp << 23) | (float_mant & 0x007FFFFF);
return fu.f;
}
// 将 double 的二进制表示转换为 uint64_t // 将 double 的二进制表示转换为 uint64_t
typedef unsigned long long uint64_t; typedef unsigned long long uint64_t;
typedef long long int64_t; typedef long long int64_t;
@ -1562,6 +1613,8 @@ static void test_base(void)
printf("floatl_ceil %s\r\n", floatl_show(floatl_ceil(floatl(v)), buffer, sizeof(buffer), "%f")); printf("floatl_ceil %s\r\n", floatl_show(floatl_ceil(floatl(v)), buffer, sizeof(buffer), "%f"));
printf("floatl_floor %s\r\n", floatl_show(floatl_floor(floatl(v)), buffer, sizeof(buffer), "%f")); printf("floatl_floor %s\r\n", floatl_show(floatl_floor(floatl(v)), buffer, sizeof(buffer), "%f"));
printf("floatl_round %s\r\n", floatl_show(floatl_round(floatl(v)), buffer, sizeof(buffer), "%f")); printf("floatl_round %s\r\n", floatl_show(floatl_round(floatl(v)), buffer, sizeof(buffer), "%f"));
printf("to_d %f\r\n", floatl_to_d(FLOATL_PI));
} }
/************************************************************************************/ /************************************************************************************/