varch/source/07_math/floatl.c

3902 lines
147 KiB
C

/*********************************************************************************************************
* ------------------------------------------------------------------------------------------------------
* file description
* ------------------------------------------------------------------------------------------------------
* \file floatl.c
* \unit floatl
* \brief This is a simple large float number calculate module for C language
* \author Lamdonn
* \version v1.1.0
* \license GPL-2.0
* \copyright Copyright (C) 2023 Lamdonn.
********************************************************************************************************/
#include "floatl.h"
/**
* \brief A union for representing a single - precision floating - point number in different forms.
*
* This union allows accessing a single - precision floating - point number (float) in three different ways:
* 1. As a standard 'float' type.
* 2. As an unsigned 32 - bit integer.
* 3. As a structured representation with separate fields for the sign, exponent, and mantissa.
*/
typedef union {
// Access the value as a standard single - precision floating - point number
float float_;
// Access the binary representation of the floating - point number as an unsigned 32 - bit integer
uint32_t int_;
// Access the individual components of the floating - point number (sign, exponent, mantissa)
struct
{
// The mantissa of the floating - point number, occupying 23 bits
uint32_t mantissa : 23;
// The exponent of the floating - point number, occupying 8 bits
uint32_t exponent : 8;
// The sign of the floating - point number, occupying 1 bit (0 for positive, 1 for negative)
uint32_t sign : 1;
};
} float_u;
/**
* \brief A union for representing a double - precision floating - point number in different forms.
*
* This union allows accessing a double - precision floating - point number (double) in three different ways:
* 1. As a standard 'double' type.
* 2. As a structure containing two 32 - bit unsigned integers (low and high parts).
* 3. As a structured representation with separate fields for the sign, exponent, and mantissa.
*/
typedef union {
// Access the value as a standard double - precision floating - point number
double float_;
// Access the binary representation of the floating - point number as two 32 - bit unsigned integers
struct
{
// The lower 32 bits of the binary representation
uint32_t low;
// The upper 32 bits of the binary representation
uint32_t high;
} int_;
// Access the individual components of the floating - point number (sign, exponent, mantissa)
struct
{
// The lower part of the mantissa of the floating - point number
uint32_t mantissa_l;
// The upper 20 bits of the mantissa of the floating - point number
uint32_t mantissa_h : 20;
// The exponent of the floating - point number, occupying 11 bits
uint32_t exponent : 11;
// The sign of the floating - point number, occupying 1 bit (0 for positive, 1 for negative)
uint32_t sign : 1;
};
} double_u;
/**
* \brief Definition of the 'floatl2' structure.
*
* This structure is designed to represent a long - bit floating - point number in multiple ways.
* It uses a union to provide different views of the same underlying data,
* which can be accessed as arrays of 16 - bit or 32 - bit unsigned integers,
* or as a combination of two 'floatl' structures.
*/
typedef struct {
/**
* \brief Union to provide different representations of the underlying data.
*
* The union allows the data to be accessed in various formats:
* as an array of 16 - bit unsigned integers, an array of 32 - bit unsigned integers,
* or as two 'floatl' structures representing low and high parts.
*/
union {
/**
* \brief Array of uint16_t values representing the long bit integer in 16 - bit segments.
*
* This array can be used to access the underlying data in 16 - bit chunks.
* The size of the array is determined by the macro __FLOATL2_U16_PARTS__.
*/
uint16_t u16[__FLOATL2_U16_PARTS__];
/**
* \brief Array of uint32_t values representing the long bit integer in 32 - bit segments.
*
* This array can be used to access the underlying data in 32 - bit chunks.
* The size of the array is determined by the macro __FLOATL2_U32_PARTS__.
*/
uint32_t u32[__FLOATL2_U32_PARTS__];
/**
* \brief Structured view with low and high 'floatl' components.
*
* This struct divides the long - bit floating - point number into a low - order and a high - order 'floatl' part.
*/
struct {
/**
* \brief The low - order 'floatl' part of the long - bit floating - point number.
*/
floatl low;
/**
* \brief The high - order 'floatl' part of the long - bit floating - point number.
*/
floatl high;
};
};
} floatl2;
/**
* \brief Definition of the FL_STACK structure.
*
* This structure represents a stack data structure. It contains information
* about the stack's base address, capacity, current size, and the index of the stack tail.
*/
typedef struct
{
/**
* \brief Protected: base address of data.
*
* This pointer points to the base address of the memory area where the stack data is stored.
*/
char* base;
/**
* \brief Protected: capacity of stack.
*
* It indicates the maximum number of elements that the stack can hold.
*/
int capacity;
/**
* \brief Protected: size of stack.
*
* This variable stores the current number of elements in the stack.
*/
int size;
/**
* \brief Protected: index of stack tail.
*
* It represents the index where the next element will be pushed or the last element was popped.
*/
int tail;
} FL_STACK;
/**
* \brief Macro to create a 'floatl2' structure with a given low - order part and a zero high - order part.
* \param x: The value to be assigned to the 'low' member of the 'floatl2' structure.
* \return A 'floatl2' structure with the 'low' member set to 'x' and the 'high' member set to '__FLOATL_INT_ZERO__'.
*
* This macro simplifies the creation of a 'floatl2' structure by providing a convenient way to initialize
* the 'low' member while setting the 'high' member to a predefined zero value.
*/
#define FLOATL2(x) (floatl2){.low=(x),.high=__FLOATL_INT_ZERO__}
/**
* \brief Flag indicating zero - padding for formatted output.
*
* When this flag is set, the output will be padded with zeros instead of spaces when the width is specified.
*/
#define FLAGS_ZEROPAD (0x01)
/**
* \brief Flag indicating left - alignment for formatted output.
*
* When this flag is set, the output will be left - aligned within the specified width.
*/
#define FLAGS_LEFT (0x02)
/**
* \brief Flag indicating that a plus sign should be prepended to positive numbers in formatted output.
*
* When this flag is set, positive numbers will be prefixed with a '+' sign.
*/
#define FLAGS_PLUS (0x04)
/**
* \brief Flag indicating that a space should be prepended to positive numbers in formatted output.
*
* When this flag is set, positive numbers will be prefixed with a space.
*/
#define FLAGS_SPACE (0x08)
/**
* \brief Flag indicating the use of an alternate form for formatted output.
*
* The exact meaning of the alternate form depends on the specific format specifier.
*/
#define FLAGS_HASH (0x10)
/**
* \brief Flag indicating uppercase output for certain format specifiers.
*
* When this flag is set, format specifiers that support case (e.g., 'a' vs 'A') will use the uppercase form.
*/
#define FLAGS_CASE (0x20)
/**
* \brief Macro to print a character to the output buffer.
* \param c: The character to be printed.
*
* This macro checks if there is enough space in the buffer to store the character.
* If there is, it adds the character to the buffer and increments the length.
* If not, it returns - 2 to indicate that the buffer is too small.
*/
#define PRINT_CHAR(c) do { if (length < max) { buffer[length++] = (c); } else return -2; } while (0)
/**
* \brief An array storing powers of 10 from 10^0 to 10^15 in 'floatl' format.
*
* Each element in this array represents a power of 10, where the index corresponds to the exponent.
* For example, flpow10[0] is 10^0, flpow10[1] is 10^1, and so on up to flpow10[15] which is 10^15.
*/
static floatl flpow10[16] = {
FLOATL_CONST_1e0 , ///< Represents 10^0
FLOATL_CONST_1e1 , ///< Represents 10^1
FLOATL_CONST_1e2 , ///< Represents 10^2
FLOATL_CONST_1e3 , ///< Represents 10^3
FLOATL_CONST_1e4 , ///< Represents 10^4
FLOATL_CONST_1e5 , ///< Represents 10^5
FLOATL_CONST_1e6 , ///< Represents 10^6
FLOATL_CONST_1e7 , ///< Represents 10^7
FLOATL_CONST_1e8 , ///< Represents 10^8
FLOATL_CONST_1e9 , ///< Represents 10^9
FLOATL_CONST_1e10, ///< Represents 10^10
FLOATL_CONST_1e11, ///< Represents 10^11
FLOATL_CONST_1e12, ///< Represents 10^12
FLOATL_CONST_1e13, ///< Represents 10^13
FLOATL_CONST_1e14, ///< Represents 10^14
FLOATL_CONST_1e15, ///< Represents 10^15
};
/* Internal static function declarations */
static int count_u32_leading_zero(uint32_t x);
static floatl floatl_pow10(int32_t n);
static char hexI(char c);
static int pad_mant(floatl *number, int *part, int *bitoffset, int *bitnum, char c);
static int floatl_int_ucmp(floatl a, floatl b);
static floatl floatl_int_umul(floatl a, floatl b);
static floatl floatl_int_udiv(floatl a, floatl b, floatl *mod);
static floatl floatl_int_umod(floatl a, floatl b);
static floatl floatl_int_add(floatl a, floatl b);
static floatl floatl_int_sub(floatl a, floatl b);
static floatl floatl_int_inc(floatl a);
static floatl floatl_int_dec(floatl a);
static floatl floatl_int_mul(floatl a, floatl b);
static floatl floatl_int_div(floatl a, floatl b);
static floatl floatl_int_mod(floatl a, floatl b);
static floatl floatl_int_shl(floatl a, uint32_t amount);
static floatl floatl_int_shr(floatl a, uint32_t amount);
static floatl floatl_int_and(floatl a, floatl b);
static floatl floatl_int_or(floatl a, floatl b);
static floatl floatl_int_xor(floatl a, floatl b);
static floatl floatl_int_not(floatl a);
static floatl floatl_int_abs(floatl a);
static int floatl_int_sign(floatl a);
static int floatl_int_cmp(floatl a, floatl b);
static floatl floatl_int_neg(floatl a);
static int floatl2_int_ucmp(floatl2 a, floatl2 b);
static floatl2 floatl2_int_shr(floatl2 a, uint32_t amount);
static floatl2 floatl2_int_shl(floatl2 a, uint32_t amount);
static int floatl2_int_sign(floatl2 a);
static floatl2 floatl2_int_add(floatl2 a, floatl2 b);
static floatl2 floatl2_int_sub(floatl2 a, floatl2 b);
static floatl2 floatl2_int_umul(floatl2 a, floatl2 b);
static floatl2 floatl2_int_udiv(floatl2 a, floatl2 b, floatl2 *mod);
/**
* \brief Push a character onto the stack.
* \param[in,out] st: Pointer to the FL_STACK structure.
* \param[in] c: The character to be pushed onto the stack.
* \return 1 if the push operation is successful, always returns 1 in this implementation.
*
* This function adds a character to the stack. If the stack is not full, it increments the size.
* It then updates the tail index in a circular manner.
*/
static int fl_stack_push(FL_STACK *st, char c)
{
// Store the character at the current tail position
st->base[st->tail] = c;
// If the stack is not full, increment the size
if (st->size < st->capacity) st->size++;
// Update the tail index in a circular manner
st->tail = (st->tail + 1) % st->capacity;
return 1;
}
/**
* \brief Pop a character from the stack.
* \param[in,out] st: Pointer to the FL_STACK structure.
* \param[out] c: Pointer to a character where the popped value will be stored.
* \return 1 if the pop operation is successful, 0 if the stack is empty.
*
* This function removes a character from the stack. It first checks if the stack is empty.
* If not, it updates the tail index in a circular manner, retrieves the character, and decrements the size.
*/
static int fl_stack_pop(FL_STACK *st, char *c)
{
// Check if the stack is empty
if (st->size == 0) return 0;
// Update the tail index in a circular manner to point to the last element
st->tail = (st->tail + st->capacity - 1) % st->capacity;
// Retrieve the character at the updated tail position
*c = st->base[st->tail];
// Decrement the size of the stack
st->size--;
return 1;
}
/**
* \brief Push a character onto the stack from the head (a special push operation).
* \param[in,out] st: Pointer to the FL_STACK structure.
* \param[in] c: The character to be pushed onto the stack.
* \return 1 if the push operation is successful, 0 if the stack is full.
*
* This function adds a character to the stack from the "head" position.
* It first checks if the stack is full. If not, it calculates the appropriate position
* to insert the character and then increments the size.
*/
static int fl_stack_push_h(FL_STACK *st, char c)
{
// Check if the stack is full
if (st->size == st->capacity) return 0;
// Calculate the position to insert the character from the head
st->base[(st->tail - st->size + st->capacity - 1) % st->capacity] = c;
// Increment the size of the stack
st->size++;
return 1;
}
/**
* \brief Adds two floatl numbers.
*
* This function computes the sum of two 128-bit integers
* (floatl). It processes each 16-bit part of the input integers,
* handling carry bits as necessary. The result is stored in a
* new floatl number. This function ensures that overflow is
* correctly managed across all parts.
*
* \param[in] a: The first operand (floatl number).
* \param[in] b: The second operand (floatl number).
* \return The sum of a and b as an floatl (128-bit integer).
*/
static floatl floatl_int_add(floatl a, floatl b)
{
floatl result; // Initialize the result variable
uint16_t carry = 0; /** Carry bit */
// Perform addition for each 16-bit part
for (int i = 0; i < __FLOATL_U16_PARTS__; i++)
{
// Calculate the sum of corresponding parts and carry
uint32_t sum = (uint32_t)a.u16[i] + (uint32_t)b.u16[i] + carry;
result.u16[i] = (uint16_t)(sum & 0xFFFF); /** Lower 16 bits */
carry = (sum >> 16) & 0xFFFF; /** Upper 16 bits as carry */
}
return result; // Return the resulting floatl number
}
/**
* \brief Subtracts one floatl number from another.
*
* This function computes the difference of two 128-bit integers
* (floatl). It processes each 16-bit part of the minuend and
* subtrahend, handling borrow bits as necessary. The result is
* stored in a new floatl number. This function ensures that
* borrowing is correctly managed across all parts.
*
* \param[in] a: The minuend (the number from which another is to be subtracted).
* \param[in] b: The subtrahend (the number to be subtracted).
* \return The result of a - b as an floatl (128-bit integer).
*/
static floatl floatl_int_sub(floatl a, floatl b)
{
floatl result; // Initialize the result variable
// Perform subtraction for each 16-bit part
for (int i = 0; i < __FLOATL_U16_PARTS__; i++)
{
uint32_t diff = (uint32_t)a.u16[i] - (uint32_t)b.u16[i];
// Check if a borrow occurred
if (diff & 0xFFFF0000) /** Borrow occurred */
{
// Adjust the higher parts to account for the borrow
for (int j = i + 1; j < __FLOATL_U16_PARTS__; j++)
{
a.u16[j] -= 1; // Borrow from the next part
if (a.u16[j] != 0xFFFF) break; // Stop if no further borrow needed
}
}
// Store the result of the subtraction
result.u16[i] = (uint16_t)(diff & 0xFFFF);
}
return result; // Return the resulting floatl number
}
/**
* \brief Increments the floatl number by one.
*
* This function increments a 128-bit integer (floatl) by one.
* It processes each 32-bit part of the input integer and
* handles carry bits as necessary. The function continues
* to increment the subsequent parts until there is no overflow.
*
* \param[in] a: The floatl number to increment.
* \return The incremented floatl number as an floatl (128-bit integer).
*/
static floatl floatl_int_inc(floatl a)
{
// Increment each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
a.u32[i]++; // Increment the current part
// Check if the current part overflowed
if (a.u32[i] != 0)
{
break; /** Return immediately if no overflow */
}
}
return a; /** Return the incremented result */
}
/**
* \brief Decrements the floatl number by one.
*
* This function decrements a 128-bit integer (floatl) by one.
* It processes each 32-bit part of the input integer, handling
* borrowing as necessary. If the current part is zero, it sets
* that part to its maximum value (0xFFFFFFFF) and continues
* to the next part to borrow from it.
*
* \param[in] a: The floatl number to decrement.
* \return The decremented floatl number as an floatl (128-bit integer).
*/
static floatl floatl_int_dec(floatl a)
{
// Decrement each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
// Check if the current part can be decremented
if (a.u32[i] != 0)
{
a.u32[i]--; // Decrement the current part
break; /** Return immediately if current part can be decremented */
}
// If current part is zero, set it to max value and continue borrowing
a.u32[i] = 0xFFFFFFFF;
}
return a; /** Return the decremented result */
}
/**
* \brief Multiplies two floatl unsigned numbers.
*
* This function performs multiplication of two 128-bit unsigned integers
* (floatl) by using a method similar to the schoolbook algorithm. The
* multiplication is carried out by breaking the numbers into their
* 16-bit components and accumulating the results. The function handles
* carry-over during the multiplication and addition stages to ensure
* the final product is accurately represented.
*
* \param[in] a: The first operand to multiply.
* \param[in] b: The second operand to multiply.
* \return The product of a and b as an floatl (128-bit unsigned integer).
*/
static floatl floatl_int_umul(floatl a, floatl b)
{
floatl result = {0}; /** Initialize the result to 0 */
floatl temp[__FLOATL_U16_PARTS__] = {{0}}; // Temporary storage for intermediate results
uint16_t carry = 0; // Variable to hold carry-over during multiplication
// Perform multiplication
for (int i = 0; i < __FLOATL_U16_PARTS__; i++)
{
carry = 0; // Reset carry for the current row
for (int j = 0; j < __FLOATL_U16_PARTS__; j++)
{
if (i + j < __FLOATL_U16_PARTS__)
{
// Multiply the 16-bit segments and add carry
uint32_t mul = (uint32_t)a.u16[i] * (uint32_t)b.u16[j] + carry;
temp[i].u16[i + j] = (mul & 0xFFFF); // Store the lower 16 bits
carry = ((mul >> 16) & 0xFFFF); // Update carry for the next addition
}
}
}
carry = 0; // Reset carry for the addition phase
// Combine results from the temporary storage
for (int i = 0; i < __FLOATL_U16_PARTS__; i++)
{
uint32_t add = 0; // Variable to hold the sum of the current column
for (int j = 0; j < __FLOATL_U16_PARTS__; j++)
{
add += temp[j].u16[i]; // Accumulate results from temp
}
add += carry; // Add any carry from the previous column
result.u16[i] = (add & 0xFFFF); // Store the lower 16 bits in result
carry = ((add >> 16) & 0xFFFF); // Update carry for the next column
}
return result; // Return the final product
}
/**
* \brief Multiplies two floatl numbers.
*
* This function multiplies two 128-bit integers (floatl) and returns
* the product as another floatl number. It handles signed multiplication
* by checking the sign of the operands. If either operand is negative,
* it negates the operand and adjusts the sign of the result accordingly.
* The actual multiplication is performed using the `floatl_int_umul`
* function, which handles the absolute values of the integers.
*
* \param[in] a: The first operand to multiply.
* \param[in] b: The second operand to multiply.
* \return The product of a and b as an floatl (128-bit integer).
*/
static floatl floatl_int_mul(floatl a, floatl b)
{
floatl result = {0}; // Initialize the result to 0
int sign = 1; // Variable to track the sign of the result
// Check and handle the sign of the first operand
if (a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
{
sign = -sign; // Negate the sign for the result
a = floatl_int_neg(a); // Negate the first operand
}
// Check and handle the sign of the second operand
if (b.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
{
sign = -sign; // Negate the sign for the result
b = floatl_int_neg(b); // Negate the second operand
}
// Perform unsigned multiplication
result = floatl_int_umul(a, b);
// If the result should be negative, negate it
if (sign < 0)
result = floatl_int_neg(result);
return result; // Return the final product
}
/**
* \brief Divides one floatl unsigned number by another.
*
* This function performs division of one 128-bit unsigned integer (floatl)
* by another. It calculates the quotient using a bitwise approach,
* handling division by zero gracefully. The result is built bit by bit
* from the most significant bit to the least significant bit. If the
* divisor is zero, it prints an error message and returns zero.
*
* \param[in] a: The dividend (number to be divided).
* \param[in] b: The divisor (number to divide by).
* \return The quotient of a divided by b as an floatl (128-bit unsigned integer).
*/
static floatl floatl_int_udiv(floatl a, floatl b, floatl *mod)
{
// Check for division by zero
if (floatl_int_sign(b) == 0)
{
printf("Division by zero!\n");
return (floatl){0}; /** Handle division by zero */
}
floatl result = {0}; // Initialize the result to zero
floatl remainder = {0}; // Initialize the remainder to zero
/** Calculate bit by bit from the highest bit */
for (int i = __FLOATL_BIT_PARTS__ - 1; i >= 0; i--)
{
/** Left shift remainder and add current bit */
remainder = floatl_int_shl(remainder, 1); // Shift remainder left by 1
remainder.u32[0] |= (a.u32[i / 32] >> (i % 32)) & 1; // Add current bit from dividend
/** If remainder is greater than or equal to b, subtract b */
if (floatl_int_ucmp(remainder, b) >= 0)
{
remainder = floatl_int_sub(remainder, b); // Subtract b from remainder
result.u32[i / 32] |= (1 << (i % 32)); // Set corresponding bit in result
}
}
if (mod) *mod = remainder;
return result; // Return the final quotient
}
/**
* \brief Divides one floatl number by another.
*
* This function performs division of two 128-bit integers (floatl)
* and returns the quotient as another floatl number. It handles signed
* division by checking the sign of the operands. If either operand
* is negative, it negates the operand and adjusts the sign of the
* result accordingly. The actual division is performed using the
* `floatl_int_udiv` function, which handles the absolute values of
* the integers.
*
* \param[in] a: The dividend (number to be divided).
* \param[in] b: The divisor (number to divide by).
* \return The quotient of a divided by b as an floatl (128-bit integer).
*/
static floatl floatl_int_div(floatl a, floatl b)
{
floatl result = {0}; // Initialize the result to 0
int sign = 1; // Variable to track the sign of the result
// Check and handle the sign of the dividend
if (a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
{
sign = -sign; // Negate the sign for the result
a = floatl_int_neg(a); // Negate the dividend
}
// Check and handle the sign of the divisor
if (b.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
{
sign = -sign; // Negate the sign for the result
b = floatl_int_neg(b); // Negate the divisor
}
// Perform unsigned division
result = floatl_int_udiv(a, b, NULL);
// If the result should be negative, negate it
if (sign < 0)
result = floatl_int_neg(result);
return result; // Return the final quotient
}
/**
* \brief Computes the remainder of the division of two unsigned floatl numbers.
*
* This function calculates the remainder of the division of two
* 128-bit unsigned integers (floatl). It uses a bitwise approach to
* compute the remainder by processing each bit from the most significant
* to the least significant. If the divisor is zero, it handles the
* error gracefully by printing a message and returning zero.
*
* \param[in] a: The dividend (number to be divided).
* \param[in] b: The divisor (number to divide by).
* \return The remainder of a divided by b as an floatl (128-bit unsigned integer).
*/
static floatl floatl_int_umod(floatl a, floatl b)
{
floatl mod = {0};
floatl_int_udiv(a, b, &mod);
return mod;
}
/**
* \brief Computes the remainder of the division of two floatl numbers.
*
* This function calculates the remainder of the division of two
* 128-bit integers (floatl). It handles signed integers by checking
* the sign of the dividend. If the dividend is negative, it negates
* the dividend before performing the unsigned modulus operation.
* The sign of the result is adjusted based on the sign of the
* dividend. The actual remainder calculation is performed using the
* `floatl_int_umod` function, which handles the absolute values of
* the integers.
*
* \param[in] a: The dividend (number to be divided).
* \param[in] b: The divisor (number to divide by).
* \return The remainder of a divided by b as an floatl (128-bit integer).
*/
static floatl floatl_int_mod(floatl a, floatl b)
{
floatl result = {0}; // Initialize result to zero
int sign = 1; // Variable to track the sign of the result
// Check and handle the sign of the dividend
if (a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
{
sign = -sign; // Negate the sign for the result
a = floatl_int_neg(a); // Negate the dividend
}
// Perform unsigned modulus with the absolute value of the divisor
result = floatl_int_umod(a, floatl_int_abs(b));
// If the result should be negative, negate it
if (sign < 0)
result = floatl_int_neg(result);
return result; // Return the final remainder
}
/**
* \brief Left shifts an floatl number by a specified number of bits.
*
* This function performs a left bitwise shift on a 128-bit integer
* (floatl). The shift amount can be greater than 32 bits, in which case
* the function calculates how many whole 32-bit parts to shift and
* how many bits to shift within the remaining part. It constructs
* the result based on the input number after applying the shift.
*
* \param[in] a: The floatl number to shift.
* \param[in] amount: The number of bits to shift to the left.
* \return The left-shifted floatl number.
*/
static floatl floatl_int_shl(floatl a, uint32_t amount)
{
floatl result = {0}; // Initialize the result to zero
int u32bias = amount / 32; // Number of whole 32-bit parts to shift
int bitsbias = amount % 32; // Remaining bits to shift
// Perform the shift for each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
if (i < u32bias)
{
result.u32[i] = 0; // Set shifted-out parts to zero
}
else
{
// Shift the current part and add bits from the previous part if needed
result.u32[i] = (a.u32[i - u32bias] << bitsbias) |
(((i - u32bias - 1) >= 0 && bitsbias > 0) ?
(a.u32[i - u32bias - 1] >> (32 - bitsbias)) : 0);
}
}
return result; // Return the left-shifted result
}
/**
* \brief Right shifts an floatl number by a specified number of bits.
*
* This function performs a right bitwise shift on a 128-bit integer
* (floatl). The shift amount can be greater than 32 bits, in which case
* the function calculates how many whole 32-bit parts to shift and
* how many bits to shift within the remaining part. It constructs
* the result based on the input number after applying the shift.
* The sign bit is preserved for signed shifts.
*
* \param[in] a: The floatl number to shift.
* \param[in] amount: The number of bits to shift to the right.
* \return The right-shifted floatl number.
*/
static floatl floatl_int_shr(floatl a, uint32_t amount)
{
floatl result = {0}; // Initialize the result to zero
int u32bias = amount / 32; // Number of whole 32-bit parts to shift
int bitsbias = amount % 32; // Remaining bits to shift
// Perform the shift for each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
// Check if the current index is beyond the range for valid shifts
if (i > __FLOATL_U32_PARTS__ - u32bias - 1 && __FLOATL_U32_PARTS__ - u32bias - 1 >= 0)
{
result.u32[i] = 0; // Set shifted-out parts to zero
}
else
{
// Shift the current part and add bits from the next part if needed
result.u32[i] = (a.u32[i + u32bias] >> bitsbias) |
(((i + u32bias + 1) < __FLOATL_U32_PARTS__ && bitsbias > 0) ?
(a.u32[i + u32bias + 1] << (32 - bitsbias)) :
((a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000) ? 0xFFFFFFFF : 0));
}
}
return result; // Return the right-shifted result
}
/**
* \brief Performs bitwise AND operation on two floatl numbers.
*
* This function computes the bitwise AND of two 128-bit integers
* (floatl). It processes each 32-bit part of the input integers and
* performs the AND operation on corresponding parts, storing the
* result in a new floatl number. This operation yields a number that
* has bits set only where both operands have bits set.
*
* \param[in] a: The first operand (floatl number).
* \param[in] b: The second operand (floatl number).
* \return The result of a AND b as an floatl (128-bit integer).
*/
static floatl floatl_int_and(floatl a, floatl b)
{
floatl result; // Initialize the result variable
// Perform the bitwise AND operation for each 32-bit part
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
result.u32[i] = a.u32[i] & b.u32[i]; // Compute AND for each part
}
return result; // Return the resulting floatl number
}
/**
* \brief Performs bitwise OR operation on two floatl numbers.
*
* This function computes the bitwise OR of two 128-bit integers
* (floatl). It processes each 32-bit part of the input integers and
* performs the OR operation on corresponding parts, storing the
* result in a new floatl number. This operation yields a number that
* has bits set where at least one of the operands has bits set.
*
* \param[in] a: The first operand (floatl number).
* \param[in] b: The second operand (floatl number).
* \return The result of a OR b as an floatl (128-bit integer).
*/
static floatl floatl_int_or(floatl a, floatl b)
{
floatl result; // Initialize the result variable
// Perform the bitwise OR operation for each 32-bit part
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
result.u32[i] = a.u32[i] | b.u32[i]; // Compute OR for each part
}
return result; // Return the resulting floatl number
}
/**
* \brief Performs bitwise XOR operation on two floatl numbers.
*
* This function computes the bitwise XOR of two 128-bit integers
* (floatl). It processes each 32-bit part of the input integers and
* performs the XOR operation on corresponding parts, storing the
* result in a new floatl number. This operation yields a number that
* has bits set where only one of the operands has bits set.
*
* \param[in] a: The first operand (floatl number).
* \param[in] b: The second operand (floatl number).
* \return The result of a XOR b as an floatl (128-bit integer).
*/
static floatl floatl_int_xor(floatl a, floatl b)
{
floatl result; // Initialize the result variable
// Perform the bitwise XOR operation for each 32-bit part
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
result.u32[i] = a.u32[i] ^ b.u32[i]; // Compute XOR for each part
}
return result; // Return the resulting floatl number
}
/**
* \brief Performs bitwise NOT operation on an floatl number.
*
* This function computes the bitwise NOT (negation) of a 128-bit
* integer (floatl). It processes each 32-bit part of the input integer
* and applies the NOT operation, storing the result in a new floatl
* number. This operation inverts all bits of the input number.
*
* \param[in] a: The floatl number to negate.
* \return The bitwise negation of a as an floatl (128-bit integer).
*/
static floatl floatl_int_not(floatl a)
{
floatl result; // Initialize the result variable
// Perform the bitwise NOT operation for each 32-bit part
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
result.u32[i] = ~a.u32[i]; // Compute NOT for each part
}
return result; // Return the resulting floatl number
}
/**
* \brief Computes the absolute value of an floatl number.
*
* This function checks if the given 128-bit unsigned integer (floatl)
* represents a negative value in two's complement representation.
* If the most significant bit (sign bit) of the highest 32-bit segment
* is set, it indicates a negative number, and the function calls
* floatl_int_neg to return its positive equivalent. If the number is
* already non-negative, it simply returns the original number.
*
* \param[in] a: The floatl number for which to compute the absolute value.
* \return The absolute value of the floatl number a.
*/
static floatl floatl_int_abs(floatl a)
{
// Check if the sign bit of the highest 32-bit part is set
if (a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000)
return floatl_int_neg(a); // Return negated value if negative
return a; // Return the original value if non-negative
}
/**
* \brief Determines the sign of an floatl number.
*
* This function checks the sign of the given 128-bit unsigned integer
* (floatl) based on its representation. It first examines the most significant
* bit of the highest 32-bit segment to determine if the number is negative.
* If this bit is set, the function returns -1, indicating a negative value.
* If all segments are zero, it returns 0, indicating that the number is zero.
* If the number is positive, it returns 1.
*
* \param[in] a: The floatl number to evaluate for its sign.
* \return -1 if the number is negative, 0 if the number is zero, and
* 1 if the number is positive.
*/
static int floatl_int_sign(floatl a)
{
// Check if the sign bit of the highest 32-bit part is set
if (a.u32[__FLOATL_U32_PARTS__ - 1] & 0x80000000) return -1;
// Check if the number is zero
for (int i = __FLOATL_U32_PARTS__ - 1; i >= 0; i--)
{
if (a.u32[i] != 0) return 1; // Return 1 if any part is non-zero
}
return 0; // Return 0 if all parts are zero
}
/**
* \brief Compares two floatl unsigned numbers.
*
* This function compares two 128-bit unsigned integers (floatl)
* by examining each 32-bit segment from the most significant to
* the least significant. It returns 1 if the first number is
* greater than the second, -1 if it is less, and 0 if they are
* equal. The comparison is done in a way that respects the
* unsigned nature of the integers.
*
* \param[in] a: The first number to compare.
* \param[in] b: The second number to compare.
* \return 1 if a > b, -1 if a < b, and 0 if a == b.
*/
static int floatl_int_ucmp(floatl a, floatl b)
{
// Compare each 32-bit part from the most significant to the least significant
for (int i = __FLOATL_U32_PARTS__ - 1; i >= 0; i--)
{
if (a.u32[i] > b.u32[i]) return 1; // a is greater
if (a.u32[i] < b.u32[i]) return -1; // a is less
}
return 0; // a and b are equal
}
/**
* \brief Compares two floatl numbers.
*
* This function compares two 128-bit integers (floatl) and
* determines their relative order. The comparison is performed
* starting from the most significant part (highest order) to
* the least significant part (lowest order).
*
* \param[in] a: The first number to compare.
* \param[in] b: The second number to compare.
*
* \return 1 if a > b, -1 if a < b, 0 if a == b.
*/
static int floatl_int_cmp(floatl a, floatl b)
{
// Compare each 32-bit part from the most significant to the least significant
for (int i = __FLOATL_U32_PARTS__ - 1; i >= 0; i--)
{
// Compare the current parts as signed integers
if ((int32_t)a.u32[i] > (int32_t)b.u32[i]) return 1; // a is greater
if ((int32_t)a.u32[i] < (int32_t)b.u32[i]) return -1; // a is less
}
return 0; // a is equal to b
}
/**
* \brief Computes the two's complement (negation) of an floatl number.
*
* This function calculates the negative representation of a given 128-bit
* unsigned integer (floatl) using two's complement. It first inverts all bits
* of the input number and then adds one to the result. This effectively
* represents the negative value of the original number in a signed
* integer format.
*
* \param[in] a: The floatl number to negate.
* \return The negated floatl number (two's complement of a).
*/
static floatl floatl_int_neg(floatl a)
{
floatl result = {0};
// First, bitwise NOT (invert) the input number
for (int i = 0; i < __FLOATL_U32_PARTS__; i++)
{
result.u32[i] = ~a.u32[i];
}
// Add one to complete the two's complement operation
return floatl_int_inc(result);
}
/**
* \brief Compares two floatl unsigned numbers.
*
* This function compares two 128-bit unsigned integers (floatl)
* by examining each 32-bit segment from the most significant to
* the least significant. It returns 1 if the first number is
* greater than the second, -1 if it is less, and 0 if they are
* equal. The comparison is done in a way that respects the
* unsigned nature of the integers.
*
* \param[in] a: The first number to compare.
* \param[in] b: The second number to compare.
* \return 1 if a > b, -1 if a < b, and 0 if a == b.
*/
static int floatl2_int_ucmp(floatl2 a, floatl2 b)
{
// Compare each 32-bit part from the most significant to the least significant
for (int i = __FLOATL2_U32_PARTS__ - 1; i >= 0; i--)
{
if (a.u32[i] > b.u32[i]) return 1; // a is greater
if (a.u32[i] < b.u32[i]) return -1; // a is less
}
return 0; // a and b are equal
}
/**
* \brief Right shifts an floatl number by a specified number of bits.
*
* This function performs a right bitwise shift on a 128-bit integer
* (floatl). The shift amount can be greater than 32 bits, in which case
* the function calculates how many whole 32-bit parts to shift and
* how many bits to shift within the remaining part. It constructs
* the result based on the input number after applying the shift.
* The sign bit is preserved for signed shifts.
*
* \param[in] a: The floatl number to shift.
* \param[in] amount: The number of bits to shift to the right.
* \return The right-shifted floatl number.
*/
static floatl2 floatl2_int_shr(floatl2 a, uint32_t amount)
{
floatl2 result = {0}; // Initialize the result to zero
int u32bias = amount / 32; // Number of whole 32-bit parts to shift
int bitsbias = amount % 32; // Remaining bits to shift
// Perform the shift for each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL2_U32_PARTS__; i++)
{
// Check if the current index is beyond the range for valid shifts
if (i > __FLOATL2_U32_PARTS__ - u32bias - 1 && __FLOATL2_U32_PARTS__ - u32bias - 1 >= 0)
{
result.u32[i] = 0; // Set shifted-out parts to zero
}
else
{
// Shift the current part and add bits from the next part if needed
result.u32[i] = (a.u32[i + u32bias] >> bitsbias) |
(((i + u32bias + 1) < __FLOATL2_U32_PARTS__ && bitsbias > 0) ?
(a.u32[i + u32bias + 1] << (32 - bitsbias)) :
((a.u32[__FLOATL2_U32_PARTS__ - 1] & 0x80000000) ? 0xFFFFFFFF : 0));
}
}
return result; // Return the right-shifted result
}
/**
* \brief Left shifts an floatl number by a specified number of bits.
*
* This function performs a left bitwise shift on a 128-bit integer
* (floatl). The shift amount can be greater than 32 bits, in which case
* the function calculates how many whole 32-bit parts to shift and
* how many bits to shift within the remaining part. It constructs
* the result based on the input number after applying the shift.
*
* \param[in] a: The floatl number to shift.
* \param[in] amount: The number of bits to shift to the left.
* \return The left-shifted floatl number.
*/
static floatl2 floatl2_int_shl(floatl2 a, uint32_t amount)
{
floatl2 result = {0}; // Initialize the result to zero
int u32bias = amount / 32; // Number of whole 32-bit parts to shift
int bitsbias = amount % 32; // Remaining bits to shift
// Perform the shift for each 32-bit part of the floatl number
for (int i = 0; i < __FLOATL2_U32_PARTS__; i++)
{
if (i < u32bias)
{
result.u32[i] = 0; // Set shifted-out parts to zero
}
else
{
// Shift the current part and add bits from the previous part if needed
result.u32[i] = (a.u32[i - u32bias] << bitsbias) |
(((i - u32bias - 1) >= 0 && bitsbias > 0) ?
(a.u32[i - u32bias - 1] >> (32 - bitsbias)) : 0);
}
}
return result; // Return the left-shifted result
}
/**
* \brief Determines the sign of an floatl number.
*
* This function checks the sign of the given 128-bit unsigned integer
* (floatl) based on its representation. It first examines the most significant
* bit of the highest 32-bit segment to determine if the number is negative.
* If this bit is set, the function returns -1, indicating a negative value.
* If all segments are zero, it returns 0, indicating that the number is zero.
* If the number is positive, it returns 1.
*
* \param[in] a: The floatl number to evaluate for its sign.
* \return -1 if the number is negative, 0 if the number is zero, and
* 1 if the number is positive.
*/
static int floatl2_int_sign(floatl2 a)
{
// Check if the sign bit of the highest 32-bit part is set
if (a.u32[__FLOATL2_U32_PARTS__ - 1] & 0x80000000) return -1;
// Check if the number is zero
for (int i = __FLOATL2_U32_PARTS__ - 1; i >= 0; i--)
{
if (a.u32[i] != 0) return 1; // Return 1 if any part is non-zero
}
return 0; // Return 0 if all parts are zero
}
/**
* \brief Adds two floatl numbers.
*
* This function computes the sum of two 128-bit integers
* (floatl). It processes each 16-bit part of the input integers,
* handling carry bits as necessary. The result is stored in a
* new floatl number. This function ensures that overflow is
* correctly managed across all parts.
*
* \param[in] a: The first operand (floatl number).
* \param[in] b: The second operand (floatl number).
* \return The sum of a and b as an floatl (128-bit integer).
*/
static floatl2 floatl2_int_add(floatl2 a, floatl2 b)
{
floatl2 result; // Initialize the result variable
uint16_t carry = 0; /** Carry bit */
// Perform addition for each 16-bit part
for (int i = 0; i < __FLOATL2_U16_PARTS__; i++)
{
// Calculate the sum of corresponding parts and carry
uint32_t sum = (uint32_t)a.u16[i] + (uint32_t)b.u16[i] + carry;
result.u16[i] = (uint16_t)(sum & 0xFFFF); /** Lower 16 bits */
carry = (sum >> 16) & 0xFFFF; /** Upper 16 bits as carry */
}
return result; // Return the resulting floatl number
}
/**
* \brief Subtracts one floatl number from another.
*
* This function computes the difference of two 128-bit integers
* (floatl). It processes each 16-bit part of the minuend and
* subtrahend, handling borrow bits as necessary. The result is
* stored in a new floatl number. This function ensures that
* borrowing is correctly managed across all parts.
*
* \param[in] a: The minuend (the number from which another is to be subtracted).
* \param[in] b: The subtrahend (the number to be subtracted).
* \return The result of a - b as an floatl (128-bit integer).
*/
static floatl2 floatl2_int_sub(floatl2 a, floatl2 b)
{
floatl2 result; // Initialize the result variable
// Perform subtraction for each 16-bit part
for (int i = 0; i < __FLOATL2_U16_PARTS__; i++)
{
uint32_t diff = (uint32_t)a.u16[i] - (uint32_t)b.u16[i];
// Check if a borrow occurred
if (diff & 0xFFFF0000) /** Borrow occurred */
{
// Adjust the higher parts to account for the borrow
for (int j = i + 1; j < __FLOATL2_U16_PARTS__; j++)
{
a.u16[j] -= 1; // Borrow from the next part
if (a.u16[j] != 0xFFFF) break; // Stop if no further borrow needed
}
}
// Store the result of the subtraction
result.u16[i] = (uint16_t)(diff & 0xFFFF);
}
return result; // Return the resulting floatl number
}
/**
* \brief Multiplies two floatl unsigned numbers.
*
* This function performs multiplication of two 128-bit unsigned integers
* (floatl) by using a method similar to the schoolbook algorithm. The
* multiplication is carried out by breaking the numbers into their
* 16-bit components and accumulating the results. The function handles
* carry-over during the multiplication and addition stages to ensure
* the final product is accurately represented.
*
* \param[in] a: The first operand to multiply.
* \param[in] b: The second operand to multiply.
* \return The product of a and b as an floatl (128-bit unsigned integer).
*/
static floatl2 floatl2_int_umul(floatl2 a, floatl2 b)
{
floatl2 result = {0}; /** Initialize the result to 0 */
floatl2 temp[__FLOATL_U16_PARTS__*2] = {{0}}; // Temporary storage for intermediate results
uint16_t carry = 0; // Variable to hold carry-over during multiplication
// Perform multiplication
for (int i = 0; i < __FLOATL2_U16_PARTS__; i++)
{
carry = 0; // Reset carry for the current row
for (int j = 0; j < __FLOATL2_U16_PARTS__; j++)
{
if (i + j < __FLOATL2_U16_PARTS__)
{
// Multiply the 16-bit segments and add carry
uint32_t mul = (uint32_t)a.u16[i] * (uint32_t)b.u16[j] + carry;
temp[i].u16[i + j] = (mul & 0xFFFF); // Store the lower 16 bits
carry = ((mul >> 16) & 0xFFFF); // Update carry for the next addition
}
}
}
carry = 0; // Reset carry for the addition phase
// Combine results from the temporary storage
for (int i = 0; i < __FLOATL2_U16_PARTS__; i++)
{
uint32_t add = 0; // Variable to hold the sum of the current column
for (int j = 0; j < __FLOATL2_U16_PARTS__; j++)
{
add += temp[j].u16[i]; // Accumulate results from temp
}
add += carry; // Add any carry from the previous column
result.u16[i] = (add & 0xFFFF); // Store the lower 16 bits in result
carry = ((add >> 16) & 0xFFFF); // Update carry for the next column
}
return result; // Return the final product
}
/**
* \brief Divides one floatl unsigned number by another.
*
* This function performs division of one 128-bit unsigned integer (floatl)
* by another. It calculates the quotient using a bitwise approach,
* handling division by zero gracefully. The result is built bit by bit
* from the most significant bit to the least significant bit. If the
* divisor is zero, it prints an error message and returns zero.
*
* \param[in] a: The dividend (number to be divided).
* \param[in] b: The divisor (number to divide by).
* \return The quotient of a divided by b as an floatl (128-bit unsigned integer).
*/
static floatl2 floatl2_int_udiv(floatl2 a, floatl2 b, floatl2 *mod)
{
int FLOATL2_BIT_PARTS = __FLOATL_BIT_PARTS__ * 2;
// Check for division by zero
if (floatl2_int_sign(b) == 0)
{
printf("Division by zero!\n");
return FLOATL2(__FLOATL_INT_ZERO__); /** Handle division by zero */
}
floatl2 result = {0}; // Initialize the result to zero
floatl2 remainder = {0}; // Initialize the remainder to zero
/** Calculate bit by bit from the highest bit */
for (int i = FLOATL2_BIT_PARTS - 1; i >= 0; i--)
{
/** Left shift remainder and add current bit */
remainder = floatl2_int_shl(remainder, 1); // Shift remainder left by 1
remainder.u32[0] |= (a.u32[i / 32] >> (i % 32)) & 1; // Add current bit from dividend
/** If remainder is greater than or equal to b, subtract b */
if (floatl2_int_ucmp(remainder, b) >= 0)
{
remainder = floatl2_int_sub(remainder, b); // Subtract b from remainder
result.u32[i / 32] |= (1 << (i % 32)); // Set corresponding bit in result
}
}
if (mod) *mod = remainder;
return result; // Return the final quotient
}
/**
* \brief Calculate 10 raised to the power of the given integer exponent.
* \param[in] n: The exponent to which 10 is raised. It can be a positive or negative 32 - bit integer.
* \return A 'floatl' value representing 10^n.
*
* This function calculates 10^n by dividing the exponent into segments of 15.
* For positive exponents, it multiplies the result by pre - computed powers of 10.
* For negative exponents, it divides the result by pre - computed powers of 10.
*/
static floatl floatl_pow10(int32_t n)
{
// Initialize the result to 1 (10^0)
floatl number = FLOATL_CONST_1;
if (n > 0)
{
// Calculate 10^n for positive exponents, dividing the exponent into segments of 15
while (n != 0)
{
if (n > 15) {
// If the exponent is greater than 15, multiply the result by 10^15 and reduce the exponent by 15
number = floatl_mul(number, flpow10[15]);
n -= 15;
}
else {
// If the remaining exponent is less than or equal to 15, multiply the result by 10^n
number = floatl_mul(number, flpow10[n]);
n = 0;
}
}
}
else if (n < 0)
{
// Calculate 10^n for negative exponents, dividing the exponent into segments of 15
while (n != 0)
{
if (n < -15) {
// If the absolute value of the exponent is greater than 15, divide the result by 10^15 and increase the exponent by 15
number = floatl_div(number, flpow10[15]);
n += 15;
}
else {
// If the absolute value of the remaining exponent is less than or equal to 15, divide the result by 10^(-n)
number = floatl_div(number, flpow10[-n]);
n = 0;
}
}
}
return number;
}
/**
* \brief Convert a hexadecimal character to its corresponding numerical value.
* \param[in] c: The hexadecimal character to be converted. It can be a digit from '0' to '9',
* a lowercase letter from 'a' to 'f', or an uppercase letter from 'A' to 'F'.
* \return The numerical value corresponding to the hexadecimal character. Returns -1 if the input
* character is not a valid hexadecimal character.
*
* This function takes a single hexadecimal character and converts it to its numerical equivalent.
* For digits '0' - '9', it returns the digit value. For letters 'a' - 'f' or 'A' - 'F', it returns
* the corresponding value from 10 to 15.
*/
static char hexI(char c)
{
// Check if the character is a digit from '0' to '9'
if (c >= '0' && c <= '9') {
// If it is a digit, return its numerical value
return c - '0';
}
// Check if the character is a lowercase letter from 'a' to 'f'
else if (c >= 'a' && c <= 'f') {
// If it is a lowercase letter, return the corresponding value (10 - 15)
return c - 'a' + 10;
}
// Check if the character is an uppercase letter from 'A' to 'F'
else if (c >= 'A' && c <= 'F') {
// If it is an uppercase letter, return the corresponding value (10 - 15)
return c - 'A' + 10;
}
// If the character is not a valid hexadecimal character, return -1
return -1;
}
/**
* \brief Fill the mantissa area of a 'floatl' number with a character value.
* \param[in,out] number: Pointer to the 'floatl' structure where the mantissa will be updated.
* \param[in,out] part: Pointer to an integer representing the current u32 part index in the mantissa area.
* \param[in,out] bitoffset: Pointer to an integer representing the current bit offset within the u32 part.
* \param[in,out] bitnum: Pointer to an integer representing the number of bits to fill.
* \param[in] c: The character value to be filled into the mantissa area.
* \return 1 if the filling operation is successful, 0 if an error occurs (e.g., part index goes out of bounds).
*
* This function takes a character value and fills it into the mantissa area of a 'floatl' number.
* It handles the bit offset and part index to ensure the value is placed correctly in the mantissa.
*/
static int pad_mant(floatl *number, int *part, int *bitoffset, int *bitnum, char c)
{
// Update the remaining number of bits at the current bit offset
(*bitoffset) -= (*bitnum);
// If the bit offset is negative, we need to move to the previous u32 part
if ((*bitoffset) < 0)
{
(*part)--;
// If the u32 part index becomes negative, we need to adjust further
if ((*part) < 0)
{
// Check if the total number of bits to fill is exactly 32. If so, return an error.
if ((*bitoffset) + (*bitnum) == 32) return 0;
else
{
// Reset the part index to 0
(*part) = 0;
// Adjust the number of bits to fill based on the remaining space in the previous part
(*bitnum) -= (32 - (*bitoffset));
// Reset the bit offset to 0
(*bitoffset) = 0;
}
}
// Update the bit offset within the current part
(*bitoffset) += 32;
}
// If the current part belongs to the mantissas array
if ((*part) < __FLOATL_MANT_PARTS__)
{
// Concatenate the character 'c' to the mantissas array
number->mantissas[(*part)] |= (c << (*bitoffset));
// If there isn't enough space in the current u32 part to fit the entire character,
// we need to continue filling the remaining part into the next higher u32 part
if ((*bitoffset) + (*bitnum) >= 32)
{
// If there is a next part in the mantissas array, fill the remaining part there
if ((*part) + 1 < __FLOATL_MANT_PARTS__) number->mantissas[(*part) + 1] |= (c >> (32 - (*bitoffset)));
// Otherwise, fill the remaining part into the high - order mantissa
else number->mantissa |= (c >> (32 - (*bitoffset)));
}
}
// If the current part belongs to the high - order mantissa
else
{
// Concatenate the character 'c' to the high - order mantissa
number->mantissa |= (c << (*bitoffset));
}
return 1;
}
/**
* \brief Convert a string representation of a floating - point number to a 'floatl' type.
* \param[in] str: Pointer to the null - terminated string representing the floating - point number.
* \return A 'floatl' value representing the converted floating - point number. Returns FLOATL_NAN if the input string is invalid.
*
* This function parses a string to create a 'floatl' number. It supports both hexadecimal (starting with "0x")
* and decimal formats. It handles signs, integer parts, decimal parts, and exponents appropriately.
*/
floatl floatl_from(const char *str)
{
// Initialize the result number to 0
floatl number = FLOATL_CONST_0;
const char *text = str;
int sign = 1, scale = 0; /* Sign and scale of integer part */
int e_sign = 1, e_scale = 0; /* Sign and scale of exponent part */
int p_sign = 1, p_scale = 0; /* Sign and scale of pico part */
// Skip leading whitespace characters
while (*text <= ' ') text++;
// Check if the number is negative
if (*text == '-')
{
sign = -1;
text++;
}
// Check if the number starts with '0'
if (*text == '0')
{
// Skip leading '0's
while (*text == '0') text++;
// Check if it is a hexadecimal number (starts with "0x" or "0X")
if (*text == 'x' || *text == 'X')
{
text++;
// Skip leading '0's after "0x"
while (*text == '0') text++;
char c = 0;
char hide = 0;
int exp = 0;
int part = __FLOATL_MANT_PARTS__;
int bitoffset = __FLOATL_MANT_HIGH_BITS__;
int bitnum = 0;
// Parse the integer part of the hexadecimal number
while (1)
{
c = hexI(*text);
if (c < 0) break;
text++;
// For the first digit, remove the hidden bit
if (!hide)
{
bitnum = 32 - count_u32_leading_zero(c) - 1;
c &= ((1 << bitnum) - 1);
hide = 1;
}
else bitnum = 4;
if (!pad_mant(&number, &part, &bitoffset, &bitnum, c)) break;
exp += bitnum;
}
// Parse the decimal part of the hexadecimal number
if (*text == '.')
{
text++;
while (1)
{
c = hexI(*text);
if (c < 0) break;
text++;
// For the first digit, remove the hidden bit
if (!hide)
{
if (c == 0)
{
exp -= 4;
continue;
}
else
{
bitnum = 32 - count_u32_leading_zero(c) - 1;
exp -= (4 - bitnum);
c &= ((1 << bitnum) - 1);
hide = 1;
}
}
else bitnum = 4;
if (!pad_mant(&number, &part, &bitoffset, &bitnum, c)) break;
}
}
// Parse the pico part (exponent part in hexadecimal format)
if (*text == 'p' || *text == 'P')
{
text++;
// Skip '+' sign
if (*text == '+') text++;
// Handle '-' sign
else if (*text == '-')
{
p_sign = -1;
text++;
}
// Check if the first character of the exponent is a valid digit
if (!(*text >= '0' && *text <= '9'))
{
return FLOATL_NAN;
}
// Convert the exponent part
while (*text >= '0' && *text <= '9')
{
p_scale = (p_scale * 10) + (*text++ - '0'); /* number */
}
}
// Apply the sign to the pico part
if (p_sign < 0) p_scale = -p_scale;
// Update the exponent
exp += p_scale;
// Check for overflow
if (exp > __FLOATL_EXP_MID_VALUE__)
{
floatl inf = FLOATL_INF;
inf.sign = sign > 0 ? 0 : 1;
return inf;
}
// Check for underflow
else if (exp < -__FLOATL_EXP_MID_VALUE__ - 1)
{
floatl zoro = FLOATL_CONST_0;
zoro.sign = sign > 0 ? 0 : 1;
return zoro;
}
// Set the sign and exponent of the result number
number.sign = sign > 0 ? 0 : 1;
number.exponent = __FLOATL_EXP_MID_VALUE__ + exp;
return number;
}
}
// Parse the integer part of the decimal number
if (*text >= '1' && *text <= '9')
{
do
{
// Multiply the number by 10 and add the current digit
number = floatl_mul(number, FLOATL_CONST_10); /* carry addition */
number = floatl_add(number, floatl_from_d((double)(*text++ - '0')));
} while (*text >= '0' && *text <= '9');
}
// Parse the decimal part of the decimal number
if (*text == '.')
{
text++;
// Check if the first character of the decimal part is a valid digit
if (!(*text >= '0' && *text <= '9'))
{
return FLOATL_NAN;
}
// Multiply the number by 10 and add the current digit, and adjust the scale
do
{
number = floatl_mul(number, FLOATL_CONST_10);
number = floatl_add(number, floatl_from_d((double)(*text++ - '0')));
scale--;
} while (*text >= '0' && *text <= '9');
}
// Parse the exponent part of the decimal number
if (*text == 'e' || *text == 'E')
{
text++;
// Skip '+' sign
if (*text == '+') text++;
// Handle '-' sign
else if (*text == '-')
{
e_sign = -1;
text++;
}
// Check if the first character of the exponent is a valid digit
if (!(*text >= '0' && *text <= '9'))
{
return FLOATL_NAN;
}
// Convert the exponent part
while (*text >= '0' && *text <= '9')
{
e_scale = (e_scale * 10) + (*text++ - '0'); /* number */
}
}
// Calculate the final result
number = floatl_mul(number, floatl_pow10(scale + e_scale * e_sign));
number.sign = sign > 0 ? 0 : 1;
return number;
}
/**
* \brief Convert a double - precision floating - point number (double) to a 'floatl' type.
* \param[in] value: The double - precision floating - point number to be converted.
* \return A 'floatl' value representing the converted number. Returns FLOATL_NAN if the input is NaN,
* FLOATL_INF if the input is infinity, and the converted value otherwise.
*
* This function takes a double value and converts it to a 'floatl' value. It first checks if the input
* is NaN or infinity and sets the result accordingly. Otherwise, it extracts the mantissa and exponent
* from the double and adjusts them to fit the 'floatl' format.
*/
floatl floatl_from_d(double value)
{
// Initialize the result to 0
floatl result = {0};
// Union to access the double value as its components
double_u f = {.float_ = value};
// Check if the input is NaN
if (f.exponent == 2047 && (f.mantissa_h != 0 || f.mantissa_l != 0))
{
result = FLOATL_NAN;
}
// Check if the input is infinity
else if (f.exponent == 2047 && (f.mantissa_h == 0 && f.mantissa_l == 0))
{
result = FLOATL_INF;
}
else
{
// Extract the low and high parts of the mantissa from the double and store them in the 'floatl' mantissa
result.mantissas[0] = f.mantissa_l;
result.mantissas[1] = f.mantissa_h;
// Shift the mantissa to the appropriate position in the 'floatl' format
result = floatl_int_shl(result, __FLOATL_MANT_BITS__ - 52);
// Adjust the exponent to fit the 'floatl' format
result.exponent = (uint32_t)(((int32_t)f.exponent - 1023) + __FLOATL_EXP_MID_VALUE__);
}
// Set the sign of the 'floatl' result
result.sign = f.sign;
return result;
}
/**
* \brief Get the sign of a 'floatl' number.
* \param[in] a: The 'floatl' number whose sign is to be determined.
* \return The sign of the 'floatl' number. It returns the value of the 'sign' field in the 'floatl' structure.
*
* This function simply extracts and returns the sign bit of the given 'floatl' number.
*/
int floatl_sign(floatl a)
{
return a.sign;
}
/**
* \brief Check if a 'floatl' number is NaN (Not a Number).
* \param[in] a: The 'floatl' number to be checked.
* \return 1 if the 'floatl' number is NaN, 0 otherwise.
*
* This function checks if the 'floatl' number is NaN by performing a bitwise AND operation
* between the number and a mask (__FLOATL_ALL_EXP_MANT__) and then comparing the result
* with the representation of infinity (__FLOATL_INF__). If the result is greater than the
* infinity representation, the number is considered NaN.
*/
int floatl_isnan(floatl a)
{
return floatl_int_ucmp(floatl_int_and(a, __FLOATL_ALL_EXP_MANT__), __FLOATL_INF__) > 0;
}
/**
* \brief Check if a 'floatl' number is infinity.
* \param[in] a: The 'floatl' number to be checked.
* \return 1 if the 'floatl' number is infinity, 0 otherwise.
*
* This function checks if the 'floatl' number is infinity by performing a bitwise AND operation
* between the number and a mask (__FLOATL_ALL_EXP_MANT__) and then comparing the result
* with the representation of infinity (__FLOATL_INF__). If the result is equal to the
* infinity representation, the number is considered infinity.
*/
int floatl_isinf(floatl a)
{
return floatl_int_ucmp(floatl_int_and(a, __FLOATL_ALL_EXP_MANT__), __FLOATL_INF__) == 0;
}
/**
* \brief Check if a 'floatl' number is a normal number.
* \param[in] a: The 'floatl' number to be checked.
* \return 1 if the 'floatl' number is a normal number, 0 otherwise.
*
* A normal number is defined as a number whose exponent is neither 0 nor the maximum possible
* exponent value (__FLOATL_EXP_WHL_VALUE__). This function checks these conditions and
* returns the appropriate result.
*/
int floatl_isnormal(floatl a)
{
return ((a.exponent != 0) && (a.exponent != __FLOATL_EXP_WHL_VALUE__));
}
/**
* \brief Compare if a 'floatl' number 'a' is less than another 'floatl' number 'b'.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return true if 'a' is less than 'b', false otherwise.
*
* This function compares two 'floatl' numbers to determine if 'a' is less than 'b'.
* It first checks the signs of the two numbers. If the signs are different,
* it checks if 'a' is negative and the result of a bit - shifted OR operation of 'a' and 'b' is non - zero.
* If the signs are the same, it checks if the two numbers are not equal and applies a condition based on the sign.
*/
static bool floatl_real_lt(floatl a, floatl b)
{
// Extract the sign of 'a'
bool sign1 = a.sign;
// Extract the sign of 'b'
bool sign2 = b.sign;
// Check if the signs of the two numbers are different
if (sign1 != sign2) // This can also be checked using XOR
{
// If 'a' is negative and the result of a bit - shifted OR operation of 'a' and 'b' is non - zero,
// then 'a' is less than 'b'
return sign1 && floatl_int_sign(floatl_int_shl(floatl_int_or(a, b), 1));
}
// If the signs are the same, 'a' is less than 'b' if the two numbers are not equal
// and for positive numbers, the one with a smaller absolute value is smaller,
// while for negative numbers, the one with a larger absolute value is smaller
return (floatl_int_ucmp(a, b)) && (sign1 ^ (floatl_int_ucmp(a, b) < 0));
}
/**
* \brief Compare if a 'floatl' number 'a' is less than or equal to another 'floatl' number 'b'.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return true if 'a' is less than or equal to 'b', false otherwise.
*
* This function compares two 'floatl' numbers to determine if 'a' is less than or equal to 'b'.
* It first checks the signs of the two numbers. If the signs are different,
* it checks if 'a' is negative or the result of a bit - shifted OR operation of 'a' and 'b' is zero.
* If the signs are the same, it checks if the two numbers are equal or the appropriate condition based on the sign holds.
*/
static bool floatl_real_le(floatl a, floatl b)
{
// Extract the sign of 'a'
bool sign1 = a.sign;
// Extract the sign of 'b'
bool sign2 = b.sign;
// Check if the signs of the two numbers are different
if (sign1 != sign2)
{
// Different from the 'lt' function, the numbers can be zero here.
// If 'a' is negative or the result of a bit - shifted OR operation of 'a' and 'b' is zero,
// then 'a' is less than or equal to 'b'
return sign1 || (floatl_int_sign(floatl_int_shl(floatl_int_or(a, b), 1)) == 0);
}
// Different from the 'lt' function, we check for equality or less - than condition.
// 'a' is less than or equal to 'b' if the two numbers are equal or the appropriate condition based on the sign holds.
return (!floatl_int_ucmp(a, b)) || (sign1 ^ (floatl_int_ucmp(a, b) < 0));
}
/**
* \brief Check if two 'floatl' numbers are equal.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if the two numbers are equal, 0 otherwise.
*
* This function first checks if either of the numbers is a NaN (Not a Number).
* If so, it immediately returns 0 because NaN is not equal to any number, including itself.
* Otherwise, it checks if the two numbers are equal by comparing their bit - representations
* using floatl_int_ucmp. It also considers the case where both numbers might be zero
* by using floatl_int_sign and floatl_int_shl along with floatl_int_or.
*/
int floatl_eq(floatl a, floatl b)
{
// If either number is a NaN, they are not equal
if (floatl_isnan(a) || floatl_isnan(b))
return 0;
return
// Check if the bit - representations of the two numbers are equal
(!floatl_int_ucmp(a, b)) ||
// Check if both numbers are effectively zero
((floatl_int_sign(floatl_int_shl(floatl_int_or(a, b), 1)) == 0));
}
/**
* \brief Check if two 'floatl' numbers are not equal.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if the two numbers are not equal, 0 otherwise.
*
* This function simply negates the result of the floatl_eq function.
* If floatl_eq returns 1 (equal), this function returns 0 (not equal), and vice versa.
*/
int floatl_ne(floatl a, floatl b)
{
return !floatl_eq(a, b);
}
/**
* \brief Check if the first 'floatl' number is less than the second.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if a < b, 0 otherwise.
*
* This function first checks if either of the numbers is a NaN.
* If so, it returns 0 because NaN cannot be compared in a meaningful way.
* Otherwise, it calls the floatl_real_lt function to perform the actual less - than comparison.
*/
int floatl_lt(floatl a, floatl b)
{
if (floatl_isnan(a) || floatl_isnan(b))
return 0;
return floatl_real_lt(a, b);
}
/**
* \brief Check if the first 'floatl' number is less than or equal to the second.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if a <= b, 0 otherwise.
*
* This function first checks if either of the numbers is a NaN.
* If so, it returns 0. Otherwise, it calls the floatl_real_le function
* to perform the actual less - than or equal comparison.
*/
int floatl_le(floatl a, floatl b)
{
if (floatl_isnan(a) || floatl_isnan(b))
return 0;
return floatl_real_le(a, b);
}
/**
* \brief Check if the first 'floatl' number is greater than the second.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if a > b, 0 otherwise.
*
* This function first checks if either of the numbers is a NaN.
* If so, it returns 0. Otherwise, it negates the result of the floatl_real_le function
* because a > b is equivalent to!(a <= b).
*/
int floatl_gt(floatl a, floatl b)
{
if (floatl_isnan(a) || floatl_isnan(b))
return 0;
return !floatl_real_le(a, b);
}
/**
* \brief Check if the first 'floatl' number is greater than or equal to the second.
* \param[in] a: The first 'floatl' number to be compared.
* \param[in] b: The second 'floatl' number to be compared.
* \return 1 if a >= b, 0 otherwise.
*
* This function first checks if either of the numbers is a NaN.
* If so, it returns 0. Otherwise, it negates the result of the floatl_real_lt function
* because a >= b is equivalent to!(a < b).
*/
int floatl_ge(floatl a, floatl b)
{
if (floatl_isnan(a) || floatl_isnan(b))
return 0;
return !floatl_real_lt(a, b);
}
/**
* \brief Compute the absolute value of a 'floatl' number.
* \param[in] a: The 'floatl' number for which the absolute value is to be computed.
* \return A 'floatl' number representing the absolute value of the input number.
*
* This function takes a 'floatl' number and checks its sign bit. If the number is negative
* (i.e., the sign bit is set), it clears the sign bit to make the number positive.
* If the number is already positive, it remains unchanged.
*/
floatl floatl_abs(floatl a)
{
// Check if the number is negative
if (a.sign)
// If negative, set the sign bit to 0 (make it positive)
a.sign = 0;
return a;
}
/**
* \brief Compute the negation of a 'floatl' number.
* \param[in] a: The 'floatl' number to be negated.
* \return A 'floatl' number representing the negation of the input number.
*
* This function takes a 'floatl' number and toggles its sign bit. If the number is positive,
* it becomes negative; if the number is negative, it becomes positive.
*/
floatl floatl_neg(floatl a)
{
// Toggle the sign bit of the number
a.sign = !a.sign;
return a;
}
/**
* \brief Computes the ceiling of a 'floatl' number.
* \param[in] a: The 'floatl' number for which the ceiling value is to be computed.
* \return A 'floatl' number representing the ceiling of the input number.
*
* This function calculates the ceiling of a 'floatl' number. The ceiling of a number is the smallest
* integer that is greater than or equal to the given number.
*/
floatl floatl_ceil(floatl a)
{
// Calculate the actual exponent value by subtracting the mid - point of the exponent range
int32_t exponent = a.exponent - __FLOATL_EXP_MID_VALUE__;
// If the exponent is negative, the number lies between - 1 and 1
if (exponent < 0)
{
// If the number is greater than 0, the ceiling is 1
if (floatl_gt(a, FLOATL_CONST_0))
{
return FLOATL_CONST_1;
}
// If the number is equal to 0, the ceiling is 0
else if (floatl_eq(a, FLOATL_CONST_0))
{
return FLOATL_CONST_0;
}
// If the number is less than 0, the ceiling is 0 (in the context of floating - point representation)
else
{
return floatl_neg(FLOATL_CONST_0);
}
}
// Calculate the number of bits to shift to isolate the integer part
int32_t shift = __FLOATL_MANT_BITS__ - exponent;
// If the shift is less than or equal to 0, the number is already an integer
if (shift <= 0) return a;
// Create a floatl number with the least significant bit set to 1
floatl int1 = FLOATL_CONST_0;
int1.u32[0] = 1;
// Create a mask to isolate the fractional part
floatl mask = floatl_int_sub(floatl_int_shl(int1, shift), int1);
// Clear the fractional part of the number
floatl result = floatl_int_and(a, floatl_int_not(mask));
// If the result is less than the original number, increment the result
if (floatl_lt(result, a))
{
result = floatl_add(result, FLOATL_CONST_1);
}
return result;
}
/**
* \brief Computes the floor of a 'floatl' number.
* \param[in] a: The 'floatl' number for which the floor value is to be computed.
* \return A 'floatl' number representing the floor of the input number.
*
* This function calculates the floor of a 'floatl' number. The floor of a number is the largest
* integer that is less than or equal to the given number.
*/
floatl floatl_floor(floatl a)
{
// Calculate the actual exponent value by subtracting the mid - point of the exponent range
int32_t exponent = a.exponent - __FLOATL_EXP_MID_VALUE__;
// If the exponent is negative, the number lies between - 1 and 1
if (exponent < 0)
{
// If the number is greater than or equal to 0, the floor is 0
if (floatl_ge(a, FLOATL_CONST_0))
{
return FLOATL_CONST_0;
}
// If the number is less than 0, the floor is - 1
else
{
return floatl_neg(FLOATL_CONST_1);
}
}
// Calculate the number of bits to shift to isolate the integer part
int32_t shift = __FLOATL_MANT_BITS__ - exponent;
// If the shift is less than or equal to 0, the number is already an integer
if (shift <= 0) return a;
// Create a floatl number with the least significant bit set to 1
floatl int1 = FLOATL_CONST_0;
int1.u32[0] = 1;
// Create a mask to isolate the fractional part
floatl mask = floatl_int_sub(floatl_int_shl(int1, shift), int1);
// Clear the fractional part of the number
floatl result = floatl_int_and(a, floatl_int_not(mask));
// If the result is greater than the original number, decrement the result
if (floatl_gt(result, a))
{
result = floatl_sub(result, FLOATL_CONST_1);
}
return result;
}
/**
* \brief Rounds a 'floatl' number to the nearest integer.
* \param[in] a: The 'floatl' number to be rounded.
* \return A 'floatl' number representing the rounded value of the input number.
*
* This function rounds a 'floatl' number to the nearest integer. If the fractional part
* is 0.5 or greater, the number is rounded up; otherwise, it is rounded down.
*/
floatl floatl_round(floatl a)
{
// Calculate the actual exponent value by subtracting the mid - point of the exponent range
int32_t exponent = a.exponent - __FLOATL_EXP_MID_VALUE__;
// If the exponent is negative, the number lies between - 1 and 1
if (exponent < 0)
{
if (floatl_gt(a, FLOATL_CONST_0))
{
// If the number is greater than 0, round up if it is greater than or equal to 0.5
return (floatl_ge(a, floatl(0.5))) ? FLOATL_CONST_1 : FLOATL_CONST_0;
}
else
{
// If the number is less than 0, round down if it is less than or equal to - 0.5
return (floatl_le(a, floatl(-0.5))) ? floatl_neg(FLOATL_CONST_1) : floatl_neg(FLOATL_CONST_0);
}
}
// Calculate the number of bits to shift to isolate the integer part
int32_t shift = __FLOATL_MANT_BITS__ - exponent;
// If the shift is less than or equal to 0, the number is already an integer
if (shift <= 0) return a;
// Create a floatl number with the least significant bit set to 1
floatl int1 = FLOATL_CONST_0;
int1.u32[0] = 1;
// Create a mask to isolate the fractional part
floatl mask = floatl_int_sub(floatl_int_shl(int1, shift), int1);
// Clear the fractional part of the number to get the integer part
floatl result = floatl_int_and(a, floatl_int_not(mask));
// Extract the fractional part of the number
floatl fractional = floatl_int_and(a, mask);
// Check if the fractional part is greater than or equal to 0.5
if (floatl_int_ucmp(fractional, floatl_int_shl(int1, shift - 1)) >= 0)
{
// If so, round up the result
result = floatl_add(result, FLOATL_CONST_1);
}
return result;
}
/**
* \brief Perform a right shift operation on a 'floatl' value while preserving the sticky bit.
* \param[in] mant: The 'floatl' value to be shifted.
* \param[in] n: The number of bits to shift to the right.
* \return A 'floatl' value representing the result of the right shift operation with the sticky bit preserved.
*
* This function performs a right shift operation on the given 'floatl' value. If the number of bits to shift
* is less than the total number of bits in a 'floatl' minus 1, it uses the floatl_int_shr function to perform
* the shift. Then, it checks if there are any non - zero bits that would be shifted out by using floatl_int_sign
* and floatl_int_shl. If so, it sets the least significant bit of the result to 1. If the number of bits to shift
* is greater than or equal to the total number of bits in a 'floatl' minus 1, it checks if the original value
* is non - zero and sets the least significant bit of the result accordingly.
*/
static floatl shift_right_sticky(floatl mant, uint32_t n)
{
// Initialize the result to zero
floatl res = __FLOATL_INT_ZERO__;
if (n < __FLOATL_BIT_PARTS__ - 1) {
// Perform the right shift operation
res = floatl_int_shr(mant, n);
// Check if there are non - zero bits shifted out
if (floatl_int_sign(floatl_int_shl(mant, __FLOATL_BIT_PARTS__ - n)))
{
// Set the least significant bit of the result
res.u32[0] |= 1;
}
}
else {
// Check if the original value is non - zero
if (floatl_int_sign(mant))
{
// Set the least significant bit of the result
res.u32[0] |= 1;
}
}
return res;
}
/**
* \brief Perform a right shift operation on the product of two 'floatl' values while preserving the sticky bit.
* \param[in] mant1: The first 'floatl' value.
* \param[in] mant2: The second 'floatl' value.
* \return A 'floatl' value representing the result of the right shift operation on the product of the two 'floatl' values
* with the sticky bit preserved.
*
* This function first multiplies the two given 'floatl' values using floatl2_int_umul and stores the result in a 'floatl2'
* structure. Then, it performs a right shift operation on the product using floatl2_int_shr. It checks if there are any
* non - zero bits that would be shifted out by using floatl2_int_sign and floatl2_int_shl. If so, it sets the least
* significant bit of the result. Finally, it returns the lower part of the 'floatl2' result as a 'floatl' value.
*/
static floatl shift_right_sticky_fl2(floatl mant1, floatl mant2)
{
// Initialize the result and temporary 'floatl2' structures to zero
floatl2 res = {0};
floatl2 temp = {0};
// Multiply the two 'floatl' values
temp = floatl2_int_umul(FLOATL2(mant1), FLOATL2(mant2));
// Perform the right shift operation on the product
res = floatl2_int_shr(temp, __FLOATL_MANT_BITS__ - 2);
// Check if there are non - zero bits shifted out
if (floatl2_int_sign(floatl2_int_shl(temp, __FLOATL_BIT_PARTS__ * 2 - (__FLOATL_MANT_BITS__ - 2))))
{
// Set the least significant bit of the result
res.u32[0] |= 1;
}
// Return the lower part of the 'floatl2' result
return res.low;
}
/**
* \brief Perform a round - to - even operation on a 'floatl' value using the GRS (Guard, Round, Sticky) bits.
* \param[in] mant: The 'floatl' value to be rounded.
* \return A 'floatl' value representing the result of the round - to - even operation.
*
* This function extracts the lower 3 bits (GRS bits) of the given 'floatl' value. Then, it shifts the 'floatl' value
* to the right by 3 bits. It creates a 'floatl' value with the least significant bit set to 1. Based on the value of
* the GRS bits and the least significant bit of the shifted value, it decides whether to increment the shifted value.
* If the GRS bits are greater than 4 or equal to 4 and the least significant bit of the shifted value is 1, it increments
* the shifted value using floatl_int_inc. Finally, it returns the rounded 'floatl' value.
*/
static floatl round_even_grs(floatl mant)
{
// Extract the GRS (Guard, Round, Sticky) bits
uint8_t grs = mant.u32[0] & 7;
// Shift the 'floatl' value to the right by 3 bits
mant = floatl_int_shr(mant, 3);
// Create a 'floatl' value with the least significant bit set to 1
floatl u1 = __FLOATL_INT_ZERO__;
u1.u32[0] = 1U;
// Check the conditions for rounding
if ((grs > 4) || ((grs == 4) && (mant.u32[0] & 1)))
// Increment the shifted value if the conditions are met
mant = floatl_int_inc(mant);
return mant;
}
/**
* \brief Determine whether the result should be NaN or Inf based on the sign and mantissa.
* \param[in] sign: A boolean value representing the sign. If true, it indicates a negative sign; otherwise, a positive sign.
* \param[in] mant: A 'floatl' type mantissa value used to determine if the result should be NaN.
* \return A 'floatl' value representing either NaN or Inf with the appropriate sign.
*
* This function first initializes the result as infinity. Then, it checks the mantissa. If the mantissa is non - zero,
* it returns NaN. Otherwise, it sets the sign bit of the infinity value and returns the result.
*/
static inline floatl floatl_nan_inf(bool sign, floatl mant)
{
// Initialize the result as infinity
floatl result = __FLOATL_INF__;
// Check if the mantissa is non - zero. If so, return NaN
if (floatl_int_sign(mant))
return __FLOATL_NAN__;
// Set the sign bit of the infinity value
result.u32[__FLOATL_U32_PARTS__ - 1] |= ((uint32_t)sign << 31);
return result;
}
/**
* \brief Count the number of leading zero bits in a 32 - bit unsigned integer.
* \param[in] x: A 32 - bit unsigned integer whose leading zero bits are to be counted.
* \return The number of leading zero bits in the input 32 - bit unsigned integer.
*
* This function uses a binary search - like approach to count the leading zero bits efficiently.
* If the input is zero, it immediately returns 32. Otherwise, it narrows down the search range
* by shifting the number and incrementing a counter accordingly.
*/
static int count_u32_leading_zero(uint32_t x)
{
// If the input is zero, there are 32 leading zero bits
if (x == 0)
return 32;
int n = 1;
// Check the higher 16 bits. If they are zero, increment the counter and shift the number
if ((x >> 16) == 0) { n += 16; x <<= 16; }
// Check the next 8 bits. If they are zero, increment the counter and shift the number
if ((x >> 24) == 0) { n += 8; x <<= 8; }
// Check the next 4 bits. If they are zero, increment the counter and shift the number
if ((x >> 28) == 0) { n += 4; x <<= 4; }
// Check the next 2 bits. If they are zero, increment the counter and shift the number
if ((x >> 30) == 0) { n += 2; x <<= 2; }
// Adjust the counter based on the most significant bit
n -= (x >> 31);
return n;
}
/**
* \brief Count the total number of leading zero bits in a 'floatl' type value.
* \param[in] x: A 'floatl' type value whose leading zero bits are to be counted.
* \return The total number of leading zero bits in the input 'floatl' value.
*
* This function iterates over the 32 - bit parts of the 'floatl' value from the most significant part to the least significant part.
* If a part is zero, it adds 32 to the counter. If a non - zero part is found, it counts the leading zero bits in that part
* using the count_u32_leading_zero function and then stops the iteration.
*/
static int count_leading_zero(floatl x)
{
int n = 0;
// Iterate over the 32 - bit parts of the 'floatl' value from the most significant part to the least significant part
for (int i = __FLOATL_U32_PARTS__ - 1; i >= 0; i--)
{
if (x.u32[i] == 0)
{
// If the current part is zero, add 32 to the counter
n += 32;
}
else
{
// If the current part is non - zero, count the leading zero bits in this part and break the loop
n += count_u32_leading_zero(x.u32[i]);
break;
}
}
return n;
}
/**
* \brief Perform addition of two 'floatl' numbers by adding their absolute values.
* The sign of the result is determined by the sign of the first parameter.
* \param[in] a: The first 'floatl' number.
* \param[in] b: The second 'floatl' number.
* \return A 'floatl' number representing the result of the addition.
*
* This function adds the absolute values of two 'floatl' numbers. It first extracts
* the exponents and mantissas of the input numbers. Then it handles special cases
* such as when the exponents are the same, or when one of the numbers is NaN or Inf.
* After that, it aligns the mantissas according to the exponent difference, adds them,
* and performs rounding and normalization steps. Finally, it checks for overflow and
* returns the result with the appropriate sign.
*/
static floatl real_floatl_add(floatl a, floatl b)
{
// Extract the exponent of the first number
uint32_t exp1 = a.exponent;
// Extract the exponent of the second number
uint32_t exp2 = b.exponent;
// Extract the mantissa of the first number, setting sign and exponent to 0
floatl mant1 = a; mant1.sign = 0; mant1.exponent = 0U;
// Extract the mantissa of the second number, setting sign and exponent to 0
floatl mant2 = b; mant2.sign = 0; mant2.exponent = 0U;
// Set the sign of the result to the sign of the first number
bool sign = a.sign;
// Calculate the difference between the exponents
int32_t expdiff = exp1 - exp2;
// Check if the exponents are the same
if (expdiff == 0)
{
// Handle denormalized numbers
if (exp1 == 0)
{
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Check for infinity or NaN
if (exp1 == __FLOATL_ALL_EXP__.exponent)
{
return floatl_nan_inf(sign, floatl_int_or(mant1, mant2));
}
}
// Check if the first number is NaN or Inf
if (exp1 == __FLOATL_ALL_EXP__.exponent) return floatl_nan_inf(sign, mant1);
// Check if the second number is NaN or Inf
if (exp2 == __FLOATL_ALL_EXP__.exponent) return floatl_nan_inf(sign, mant2);
// If the first number is denormalized, return the second number with the appropriate sign
if (exp1 == 0)
{
b.sign = sign;
return b;
}
// If the second number is denormalized, return the first number with the appropriate sign
if (exp2 == 0)
{
a.sign = sign;
return a;
}
// Add the hidden bit (1) to the mantissas
mant1 = floatl_int_or(mant1, __FLOATL_HIDE_MANT_BIT__);
mant2 = floatl_int_or(mant2, __FLOATL_HIDE_MANT_BIT__);
// Shift the mantissas left by 2 bits
mant1 = floatl_int_shl(mant1, 2);
mant2 = floatl_int_shl(mant2, 2);
// If the exponent of the first number is smaller
if (expdiff < 0)
{
expdiff = -expdiff;
// Shift the first mantissa right by the exponent difference, preserving the sticky bit
mant1 = shift_right_sticky(mant1, expdiff);
exp1 = exp2;
}
// If the exponent of the first number is larger
else if (expdiff > 0)
{
// Shift the second mantissa right by the exponent difference, preserving the sticky bit
mant2 = shift_right_sticky(mant2, expdiff);
}
// Add the two mantissas
floatl mant = floatl_int_add(mant1, mant2);
// Check if the result mantissa is less than the maximum allowed value
if (floatl_int_ucmp(mant, __FLOATL_MANT_PLUS_MAX__) < 0)
{
// Shift the mantissa left by 1 bit
mant = floatl_int_shl(mant, 1);
}
else
{
// Increment the exponent if the mantissa overflows
exp1++;
}
// Perform round - to - even rounding on the mantissa
mant = round_even_grs(mant);
// Check if the rounded mantissa exceeds the maximum value
if (floatl_int_ucmp(mant, __FLOATL_MANT_WHL__) > 0)
{
// Increment the exponent if the rounded mantissa overflows
exp1++;
}
// Check for overflow of the exponent
if (exp1 > ((uint32_t)(__FLOATL_ALL_EXP__.exponent) - 1))
{
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Set the result number with the new mantissa, sign, and exponent
a = mant;
a.sign = sign;
a.exponent = exp1;
return a;
}
/**
* \brief Perform subtraction of two 'floatl' numbers by subtracting their absolute values.
* The sign of the result is the sign of the number with the larger absolute value.
* \param[in] a: The first 'floatl' number.
* \param[in] b: The second 'floatl' number.
* \return A 'floatl' number representing the result of the subtraction.
*
* This function subtracts the absolute values of two 'floatl' numbers. It first extracts
* the exponents and mantissas of the input numbers. Then it handles special cases
* such as when the exponents are the same, or when one of the numbers is NaN or Inf.
* After that, it aligns the mantissas according to the exponent difference, subtracts them,
* and performs normalization and rounding steps. Finally, it checks for underflow and
* returns the result with the appropriate sign.
*/
static floatl real_floatl_sub(floatl a, floatl b)
{
// Extract the exponent of the first number
uint32_t exp1 = a.exponent;
// Extract the exponent of the second number
uint32_t exp2 = b.exponent;
// Extract the mantissa of the first number, setting sign and exponent to 0
floatl mant1 = a; mant1.sign = 0; mant1.exponent = 0U;
// Extract the mantissa of the second number, setting sign and exponent to 0
floatl mant2 = b; mant2.sign = 0; mant2.exponent = 0U;
// Initialize the result mantissa to zero
floatl mant = __FLOATL_INT_ZERO__;
// Set the sign of the result to the sign of the first number
bool sign = a.sign;
// Calculate the difference between the exponents
int32_t expdiff = exp1 - exp2;
// Check if the exponents are the same
if (expdiff == 0)
{
// Return NaN if both exponents represent NaN or Inf
if (exp1 == __FLOATL_ALL_EXP__.exponent) return __FLOATL_NAN__;
// Return zero if the mantissas are equal
if (floatl_int_ucmp(mant1, mant2) == 0) return __FLOATL_INT_ZERO__;
// If the first mantissa is larger
if (floatl_int_ucmp(mant1, mant2) > 0)
{
// Subtract the second mantissa from the first
mant = floatl_int_sub(mant1, mant2);
}
else
{
// Subtract the first mantissa from the second and reverse the sign
mant = floatl_int_sub(mant2, mant1);
sign = !sign;
}
// Shift the result mantissa left by 3 bits
mant = floatl_int_shl(mant, 3);
}
// If the exponent of the first number is smaller
else if (expdiff < 0)
{
// Reverse the sign of the result
sign = !sign;
// Check if the second number is NaN or Inf
if (exp2 == __FLOATL_ALL_EXP__.exponent) return floatl_nan_inf(sign, mant2);
// If the first number is denormalized, return the second number with the appropriate sign
if (exp1 == 0) { b.sign = sign; return b; }
exp1 = exp2;
expdiff = -expdiff;
// Add the hidden bit to the first mantissa and shift it left by 3 bits
mant1 = floatl_int_shl(floatl_int_or(mant1, __FLOATL_HIDE_MANT_BIT__), 3);
// Shift the first mantissa right by the exponent difference, preserving the sticky bit
mant1 = shift_right_sticky(mant1, expdiff);
// Add the hidden bit to the second mantissa, shift it left by 3 bits, and subtract the first mantissa
mant = floatl_int_sub(floatl_int_shl(floatl_int_or(mant2, __FLOATL_HIDE_MANT_BIT__), 3), mant1);
}
else
{
// Check if the first number is NaN or Inf
if (exp1 == __FLOATL_ALL_EXP__.exponent) return floatl_nan_inf(sign, mant1);
// If the second number is denormalized, return the first number with the appropriate sign
if (exp2 == 0) { a.sign = sign; return a; }
// Add the hidden bit to the second mantissa and shift it left by 3 bits
mant2 = floatl_int_shl(floatl_int_or(mant2, __FLOATL_HIDE_MANT_BIT__), 3);
// Shift the second mantissa right by the exponent difference, preserving the sticky bit
mant2 = shift_right_sticky(mant2, expdiff);
// Add the hidden bit to the first mantissa, shift it left by 3 bits, and subtract the second mantissa
mant = floatl_int_sub(floatl_int_shl(floatl_int_or(mant1, __FLOATL_HIDE_MANT_BIT__), 3), mant2);
}
// Count the leading zero bits in the result mantissa and adjust for the exponent bits
int shift = count_leading_zero(mant) - (__FLOATL_EXP_BITS__ - 3);
// Limit the shift to the current exponent value
if (shift > exp1)
{
shift = exp1;
// Shift the mantissa right by 1 bit
mant = floatl_int_shr(mant, 1);
}
// Decrease the exponent by the shift amount
exp1 -= shift;
// Shift the mantissa left by the shift amount
mant = floatl_int_shl(mant, shift);
// Perform round - to - even rounding on the mantissa
mant = round_even_grs(mant);
// Check if the rounded mantissa exceeds the maximum value
if (floatl_int_ucmp(mant, __FLOATL_MANT_WHL__) > 0)
{
// Increment the exponent if the rounded mantissa overflows
exp1++;
}
// Check for underflow of the exponent
if (exp1 < 1)
{
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Set the result number with the new mantissa, sign, and exponent
a = mant;
a.sign = sign;
a.exponent = exp1;
return a;
}
/**
* \brief Perform addition of two 'floatl' numbers.
* \param[in] a: The first 'floatl' number to be added.
* \param[in] b: The second 'floatl' number to be added.
* \return A 'floatl' number representing the result of the addition.
*
* This function decides whether to perform an actual addition or subtraction based on the signs of the two input numbers.
* If the signs of 'a' and 'b' are the same, it calls the 'real_floatl_add' function to add their absolute values.
* If the signs are different, it calls the 'real_floatl_sub' function to subtract their absolute values.
*/
floatl floatl_add(floatl a, floatl b)
{
// Check if the signs of the two numbers are the same
if (a.sign == b.sign)
// If the signs are the same, perform addition of absolute values
return real_floatl_add(a, b);
// If the signs are different, perform subtraction of absolute values
return real_floatl_sub(a, b);
}
/**
* \brief Perform subtraction of two 'floatl' numbers.
* \param[in] a: The minuend 'floatl' number.
* \param[in] b: The subtrahend 'floatl' number.
* \return A 'floatl' number representing the result of the subtraction.
*
* This function determines whether to perform an actual subtraction or addition based on the signs of the two input numbers.
* If the signs of 'a' and 'b' are the same, it calls the 'real_floatl_sub' function to subtract their absolute values.
* If the signs are different, it calls the 'real_floatl_add' function to add their absolute values.
*/
floatl floatl_sub(floatl a, floatl b)
{
// Check if the signs of the two numbers are the same
if (a.sign == b.sign)
// If the signs are the same, perform subtraction of absolute values
return real_floatl_sub(a, b);
// If the signs are different, perform addition of absolute values
return real_floatl_add(a, b);
}
/**
* \brief Perform multiplication of two 'floatl' numbers.
* \param[in] a: The first 'floatl' number to be multiplied.
* \param[in] b: The second 'floatl' number to be multiplied.
* \return A 'floatl' number representing the result of the multiplication.
*
* This function multiplies two 'floatl' numbers 'a' and 'b'. It first extracts the exponents and mantissas
* of the input numbers. Then it checks for special cases such as infinity, NaN, and zero. After that,
* it calculates the new exponent and multiplies the mantissas. It performs normalization, rounding,
* and checks for underflow and overflow conditions. Finally, it returns the result with the appropriate sign.
*/
floatl floatl_mul(floatl a, floatl b)
{
// Extract the exponent of the first number
uint32_t exp1 = a.exponent;
// Extract the exponent of the second number
uint32_t exp2 = b.exponent;
// Extract the mantissa of the first number, setting sign and exponent to 0
floatl mant1 = a; mant1.sign = 0; mant1.exponent = 0U;
// Extract the mantissa of the second number, setting sign and exponent to 0
floatl mant2 = b; mant2.sign = 0; mant2.exponent = 0U;
// Determine the sign of the result using XOR
bool sign = a.sign ^ b.sign;
// Check if the first number is infinity or NaN
if (exp1 == __FLOATL_ALL_EXP__.exponent)
{
// Check for NaN conditions
if (floatl_sign(mant1) || ((exp2 == __FLOATL_ALL_EXP__.exponent) && floatl_sign(mant2)) || (exp2 == 0))
return __FLOATL_NAN__;
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Check if the second number is infinity or NaN
if (exp2 == __FLOATL_ALL_EXP__.exponent)
{
// Check for NaN conditions
if (floatl_sign(mant2) || (exp1 == 0))
return __FLOATL_NAN__;
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Check if either number is zero
if ((exp1 == 0) || (exp2 == 0))
{
// Set the result to zero with the appropriate sign
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Initialize the result mantissa to zero
floatl mant = __FLOATL_INT_ZERO__;
// Calculate the new exponent
int32_t exp = exp1 + exp2 - __FLOATL_EXP_MID_VALUE__;
// Add the hidden bit (1) to the mantissas
mant1 = floatl_int_or(mant1, __FLOATL_HIDE_MANT_BIT__);
mant2 = floatl_int_or(mant2, __FLOATL_HIDE_MANT_BIT__);
// Multiply the mantissas and perform right shift with sticky bit preservation
mant = shift_right_sticky_fl2(mant1, mant2);
// Check if the result mantissa is less than the maximum allowed value
if (floatl_int_ucmp(mant, __FLOATL_MANT_PLUS_MAX__) < 0)
{
// Shift the mantissa left by 1 bit
mant = floatl_int_shl(mant, 1);
}
else
{
// Increment the exponent if the mantissa overflows
exp++;
}
// Perform round - to - even rounding on the mantissa
mant = round_even_grs(mant);
// Check if the rounded mantissa exceeds the maximum value
if (floatl_int_ucmp(mant, __FLOATL_MANT_WHL__) > 0)
exp++;
// Check for underflow of the exponent
if (exp < 1)
{
// Set the result to zero with the appropriate sign
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Check for overflow of the exponent
if (exp > ((uint32_t)__FLOATL_ALL_EXP__.exponent - 1))
{
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Set the result number with the new mantissa, sign, and exponent
a = mant;
a.sign = sign;
a.exponent = exp;
return a;
}
/**
* \brief Perform division of two 'floatl' numbers.
* \param[in] a: The dividend 'floatl' number.
* \param[in] b: The divisor 'floatl' number.
* \return A 'floatl' number representing the result of the division.
*
* This function divides two 'floatl' numbers 'a' by 'b'. It first extracts the exponents and mantissas
* of the input numbers. Then it checks for special cases such as infinity, NaN, and zero. After that,
* it calculates the new exponent and performs the division of the mantissas. It performs normalization,
* rounding, and checks for underflow and overflow conditions. Finally, it returns the result with the appropriate sign.
*/
floatl floatl_div(floatl a, floatl b)
{
// Extract the exponent of the first number
uint32_t exp1 = a.exponent;
// Extract the exponent of the second number
uint32_t exp2 = b.exponent;
// Extract the mantissa of the first number, setting sign and exponent to 0
floatl mant1 = a; mant1.sign = 0; mant1.exponent = 0U;
// Extract the mantissa of the second number, setting sign and exponent to 0
floatl mant2 = b; mant2.sign = 0; mant2.exponent = 0U;
// Determine the sign of the result using XOR
bool sign = a.sign ^ b.sign;
// Check if the first number is infinity or NaN
if (exp1 == __FLOATL_ALL_EXP__.exponent)
{
// Check for NaN conditions
if (floatl_int_sign(mant1) || (exp2 == __FLOATL_ALL_EXP__.exponent))
return __FLOATL_NAN__;
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Check if the second number is infinity or NaN
if (exp2 == __FLOATL_ALL_EXP__.exponent)
{
// Check for NaN conditions
if (floatl_int_sign(mant2))
return __FLOATL_NAN__;
// Set the result to zero with the appropriate sign
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Check if the dividend is zero
if (exp1 == 0)
{
// Check for NaN condition (division of zero by zero)
if (exp2 == 0)
return __FLOATL_NAN__;
// Set the result to zero with the appropriate sign
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Check if the divisor is zero
if (exp2 == 0)
{
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Calculate the new exponent
int32_t exp = exp1 - exp2 + __FLOATL_EXP_MID_VALUE__;
// Add the hidden bit (1) to the mantissas
mant1 = floatl_int_or(mant1, __FLOATL_HIDE_MANT_BIT__);
mant2 = floatl_int_or(mant2, __FLOATL_HIDE_MANT_BIT__);
floatl mant;
floatl2 sigl2 = FLOATL2(__FLOATL_INT_ZERO__);
// Check if the dividend mantissa is less than the divisor mantissa
if (floatl_int_ucmp(mant1, mant2) < 0)
{
// Decrease the exponent
exp--;
// Shift the dividend mantissa left by __FLOATL_BIT_PARTS__ bits
sigl2 = floatl2_int_shl(FLOATL2(mant1), __FLOATL_BIT_PARTS__);
}
else
{
// Shift the dividend mantissa left by __FLOATL_BIT_PARTS__ - 1 bits
sigl2 = floatl2_int_shl(FLOATL2(mant1), __FLOATL_BIT_PARTS__ - 1);
}
floatl2 rem;
// Perform the division of the mantissas and shift the result right
mant = floatl2_int_shr(floatl2_int_udiv(sigl2, FLOATL2(mant2), &rem), __FLOATL_EXP_BITS__ - 3).low;
// Check if there is a non - zero remainder
if (floatl_sign(rem.low))
{
// Set the least significant bit of the mantissa
mant.u32[0] |= 1;
}
// Perform round - to - even rounding on the mantissa
mant = round_even_grs(mant);
// Check for underflow of the exponent
if (exp < 1)
{
// Set the result to zero with the appropriate sign
a = __FLOATL_INT_ZERO__;
a.sign = sign;
return a;
}
// Check for overflow of the exponent
if (exp > ((uint32_t)__FLOATL_ALL_EXP__.exponent - 1))
{
// Set the result to infinity with the appropriate sign
a = __FLOATL_INF__;
a.sign = sign;
return a;
}
// Set the result number with the new mantissa, sign, and exponent
a = mant;
a.sign = sign;
a.exponent = exp;
return a;
}
/**
* \brief Check if the absolute value of the integer part of a 'floatl' number is greater than 1.
* \param[in] num: The 'floatl' number to be checked.
* \return 1 if the absolute value of the integer part is greater than 1 (i.e., the integer part is non - zero), 0 otherwise.
*
* This function first checks if the exponent of the 'floatl' number is 1. If so, it returns 0.
* Then it calculates the actual exponent value by subtracting the mid - value of the exponent range.
* If the calculated exponent is greater than or equal to 0, it means the integer part is non - zero, and it returns 1.
*/
static int LT1(floatl num)
{
// If the exponent is 1, the integer part is not greater than 1
if (((uint32_t)num.exponent) == 1) return 0;
// Calculate the actual exponent value
int32_t exp = ((int)num.exponent) - __FLOATL_EXP_MID_VALUE__;
// Check if the actual exponent is non - negative
return exp >= 0;
}
/**
* \brief Check if the absolute value of the fractional part of a 'floatl' number is less than 1.
* \param[in] num: The 'floatl' number to be checked.
* \return 1 if the absolute value of the fractional part is less than 1 (i.e., the fractional part is non - zero), 0 otherwise.
*
* This function first calculates the actual exponent value by subtracting the mid - value of the exponent range.
* If the calculated exponent is negative, it means the fractional part is non - zero, and it returns 1.
* If the exponent is non - negative and less than the number of mantissa bits, it checks each part of the mantissa
* to see if there are non - zero bits in the fractional part. If so, it returns 1. Otherwise, it returns 0.
*/
static int ST1(floatl num)
{
// Calculate the actual exponent value
int exp = ((int)num.exponent) - __FLOATL_EXP_MID_VALUE__;
// If the exponent is negative, the fractional part is non - zero
if (exp < 0) return 1;
// If the exponent is less than the number of mantissa bits
if (exp < __FLOATL_MANT_BITS__)
{
// Calculate the number of bits in the fractional part
int bits = __FLOATL_MANT_BITS__ - exp;
// Iterate over each part of the mantissa
for (int i = 0; i < __FLOATL_MANT_PARTS__; i++)
{
if (bits >= 32)
{
// If the number of fractional bits is greater than or equal to 32, check if the current part is non - zero
if (num.mantissas[i] != 0) return 1;
// Decrease the number of remaining fractional bits
bits -= 32;
}
else
{
// Create a mask to check the fractional bits in the current part
if ((num.mantissas[i] & ((1 << bits) - 1))) return 1;
// No more fractional bits to check
bits = 0;
break;
}
}
// Check the remaining fractional bits in the high - order part of the mantissa
if (bits && bits <= __FLOATL_MANT_HIGH_BITS__)
{
if ((num.mantissa & ((1 << bits) - 1))) return 1;
}
}
return 0;
}
/**
* \brief Get the character value of the lowest - order digit of the integer part of a 'floatl' number.
* \param[in] num: The 'floatl' number from which to extract the lowest - order digit of the integer part.
* \return A character representing the lowest - order digit of the integer part of the 'floatl' number.
*
* This function calculates the lowest - order digit of the integer part of a 'floatl' number.
* It first calculates the exponent value relative to the mid - value of the exponent range.
* Then, depending on the exponent value, it either returns '0', '1', or calculates the digit
* through one of two methods (commented out in the code). The current method involves
* handling the mantissa parts of the 'floatl' number and performing modular arithmetic
* to obtain the lowest - order digit.
*/
static char gChar(floatl num)
{
// An array used to handle the last - digit pattern for powers of 2
const static char bitEnd[4] = {2,4,8,6};
// Calculate the actual exponent value
int32_t exp = ((int)num.exponent) - __FLOATL_EXP_MID_VALUE__;
uint32_t c = 0;
// If the exponent is 0, the lowest - order digit of the integer part is 1
if (exp == 0) return '1';
// If the exponent is negative, the integer part is 0
else if (exp < 0) return '0';
#if 1 // This block is commented out. It calculates the result by considering each bit
// Iterate over each bit of the mantissa
for (int i = 0; i < __FLOATL_MANT_BITS__; i++)
{
// Calculate the position relative to the exponent
int index = i - __FLOATL_MANT_BITS__ + exp;
if (index >= 0)
{
// Determine the part of the mantissa array and the bit position
int part = i / 32;
int bit = i % 32;
// Get the appropriate part of the mantissa
uint32_t m = part >= __FLOATL_MANT_PARTS__ ? num.mantissa : num.mantissas[part];
// Check if the bit is set
if ((m >> bit) & 1)
{
// Add the corresponding value to the accumulator
c += ((index > 0) ? bitEnd[(index - 1) % 4] : 1);
}
}
}
// Add the value corresponding to the exponent
c += ((exp > 0) ? bitEnd[(exp - 1) % 4] : 1);
// Get the last digit and convert it to a character
c = c % 10 + '0';
#else // This block is the current method, calculating the result by considering u32 parts
// Add the hidden bit to the high - order part of the mantissa
uint32_t hi = (uint32_t)num.mantissa | (1 << __FLOATL_MANT_HIGH_BITS__);
// If the exponent is less than or equal to the number of high - order mantissa bits
if (exp <= __FLOATL_MANT_HIGH_BITS__)
{
// Calculate the contribution of the high - order mantissa to the last digit
c += ((hi >> (__FLOATL_MANT_HIGH_BITS__ - exp)) % 10);
}
// If the exponent is greater than the number of high - order mantissa bits
else
{
// Determine the bias based on the exponent
char bias = bitEnd[(exp - __FLOATL_MANT_HIGH_BITS__ - 1) % 4];
// Calculate the number of remaining bits
int bits = exp - __FLOATL_MANT_HIGH_BITS__;
// Calculate the contribution of the high - order mantissa with the bias
c += ((hi * bias) % 10);
// Iterate over the mantissa array parts
for (int i = __FLOATL_MANT_PARTS__ - 1; i >= 0 && bits > 0; i--, bits -= 32)
{
if (bits > 32)
// Calculate the contribution of the current mantissa part with the bias
c += ((num.mantissas[i] * bias) % 10);
else
// Calculate the contribution of the relevant bits of the current mantissa part
c += ((num.mantissas[i] >> (32 - bits)) % 10);
}
}
// Get the last digit and convert it to a character
c = c % 10 + '0';
#endif
return (char)c;
}
/**
* \brief Reverse a string of a specified length.
* \param[in,out] buffer: A pointer to the character array (string) to be reversed.
* \param[in] len: The length of the string to be reversed.
*
* This function takes a character array and its length as input. It swaps the characters
* from the start and the end of the string towards the middle, effectively reversing the string.
*/
static void reverse(char *buffer, int len)
{
// Iterate over the first half of the string
for (int i = 0; i < len / 2; i++)
{
// Store the current character in a temporary variable
char tc = buffer[i];
// Swap the current character with the corresponding character from the end
buffer[i] = buffer[len - 1 - i];
buffer[len - 1 - i] = tc;
}
}
/**
* \brief Get the decimal character of the lowest - order digit of the integer part of a 'floatl' number
* and update the 'floatl' number.
* \param[in,out] num: A pointer to the 'floatl' number. The number will be updated after the operation.
* \return The character representing the lowest - order digit of the integer part of the 'floatl' number.
* If the integer part is 0, it returns 0.
*
* This function first checks if the integer part of the 'floatl' number is non - zero using the LT1 function.
* If the integer part is non - zero, it gets the lowest - order digit character using the gChar function.
* Then it divides the 'floatl' number by 10 to prepare for the next digit extraction.
*/
static char floatlIntLowChar(floatl *num)
{
char c = 0;
// Check if the integer part of the 'floatl' number is non - zero
if (LT1(*num)) // >= 1, the integer part is not 0
{
// Get the character of the lowest - order digit of the integer part
c = gChar(*num);
// Divide the 'floatl' number by 10 to move to the next digit
*num = floatl_div(*num, floatl_from_d(10.0));
}
return c;
}
/**
* \brief Get the decimal character of the highest - order digit of the fractional part of a 'floatl' number
* and update the 'floatl' number.
* \param[in,out] num: A pointer to the 'floatl' number. The number will be updated after the operation.
* \return The character representing the highest - order digit of the fractional part of the 'floatl' number.
* If the fractional part is 0, it returns 0.
*
* This function first checks if the fractional part of the 'floatl' number is non - zero using the ST1 function.
* If the fractional part is non - zero, it multiplies the 'floatl' number by 10. Then it gets the highest - order
* digit character of the new number using the gChar function. Finally, it updates the original 'floatl' number.
*/
static char floatlDitHighChar(floatl *num)
{
char c = 0;
floatl temp = *num;
// Check if the fractional part of the 'floatl' number is non - zero
if (ST1(temp)) // The decimal part is not 0
{
// Multiply the 'floatl' number by 10 to get the next digit
temp = floatl_mul(temp, floatl_from_d(10.0));
// Get the character of the highest - order digit of the new number
c = gChar(temp);
// Update the original 'floatl' number
*num = temp;
}
return c;
}
/**
* \brief Perform carry addition on a decimal string.
* \param[in] begin: A pointer to the start of the string.
* \param[in] end: A pointer to the end of the string.
* \param[in] format: A format character used to determine if a specific format marker is reached.
* \return If an exception occurs, returns a pointer to the abnormal character; if the carry is completed, returns NULL;
* if the format marker is reached, returns a pointer to the format marker.
*
* This function traverses the string from the end to the beginning, handling the carry situation of decimal numbers.
* It skips the decimal point, continues to carry forward when encountering '9', and completes the carry when encountering a space or a valid digit.
*/
static char* floatl_carry_add(char *begin, char *end, char format)
{
// Start traversing from the end of the string
char *s = end;
for (; s >= begin; s--)
{
// If the format character is encountered, return a pointer to this character
if (*s == format) return s;
// Skip the decimal point
if (*s == '.') continue;
// If the current character is '9', set it to '0' and continue to carry forward
else if (*s == '9')
{
*s = '0';
continue;
}
// If the current character is a space, set it to '1' to indicate the completion of the carry
else if (*s == ' ')
{
*s = '1';
return NULL;
}
// If the current character is a digit between '0' and '9', increment it by 1 to complete the carry
else if (*s >= '0' && *s < '9')
{
(*s)++;
return NULL;
}
// If an abnormal character is encountered, return a pointer to this character
else return s;
}
// If the carry is not completed after traversing the entire string, return s + 1
return s + 1;
}
/**
* \brief Perform borrow subtraction on a decimal string.
* \param[in] begin: A pointer to the start of the string.
* \param[in] end: A pointer to the end of the string.
* \param[in] format: A format character used to determine if a specific format marker is reached.
* \return If an exception occurs, returns a pointer to the abnormal character; if the borrow is completed, returns NULL;
* if the format marker is reached, returns a pointer to the format marker.
*
* This function traverses the string from the end to the beginning, handling the borrow situation of decimal numbers.
* It skips the decimal point, continues to borrow forward when encountering '0', and completes the borrow when encountering a space or a valid digit.
*/
static char* floatl_carry_sub(char *begin, char *end, char format)
{
// Start traversing from the end of the string
char *s = end;
for (; s >= begin; s--)
{
// If the format character is encountered, return a pointer to this character
if (*s == format) return s;
// Skip the decimal point
if (*s == '.') continue;
// If the current character is '0', set it to '9' and continue to borrow forward
else if (*s == '0')
{
*s = '9';
continue;
}
// If the current character is a space, set it to '9' to indicate the completion of the borrow
else if (*s == ' ')
{
*s = '9';
return NULL;
}
// If the current character is a digit between '1' and '9', decrement it by 1 to complete the borrow
else if (*s > '0' && *s <= '9')
{
(*s)--;
return NULL;
}
// If an abnormal character is encountered, return a pointer to this character
else return s;
}
// If the borrow is not completed after traversing the entire string, return s + 1
return s + 1;
}
/**
* \brief Perform carry addition on a hexadecimal string.
* \param[in] begin: A pointer to the start of the string.
* \param[in] end: A pointer to the end of the string.
* \param[in] format: A format character used to control the case of hexadecimal letters.
* \return If an exception occurs, returns a pointer to the abnormal character; if the carry is completed, returns NULL.
*
* This function traverses the string from the end to the beginning, handling the carry situation of hexadecimal numbers.
* It skips the decimal point, continues to carry forward when encountering 'F' or 'f', and completes the carry when encountering a space or a valid hexadecimal character.
*/
static char* floatl_carry_add_h(char *begin, char *end, char format)
{
// Start traversing from the end of the string
char *s = end;
for (; s >= begin; s--)
{
// Skip the decimal point
if (*s == '.') continue;
// If the current character is 'F' or 'f', set it to '0' and continue to carry forward
else if (*s == 'F' || *s == 'f')
{
*s = '0';
continue;
}
// If the current character is a space, set it to '1' to indicate the completion of the carry
else if (*s == ' ')
{
*s = '1';
return NULL;
}
// If the current character is '9', convert it to 'A' or 'a' according to the format
else if (*s == '9')
{
*s = format ? 'A' : 'a';
return NULL;
}
// If the current character is a valid hexadecimal character (digit or letter), increment it by 1 to complete the carry
else if ((*s >= '0' && *s < '9') ||
(*s >= 'A' && *s < 'F') ||
(*s >= 'a' && *s < 'f'))
{
(*s)++;
return NULL;
}
// If an abnormal character is encountered, return a pointer to this character
else return s;
}
// If the carry is not completed after traversing the entire string, return s + 1
return s + 1;
}
/**
* \brief Extract a 4 - bit hexadecimal digit from the mantissa of a 'floatl' number at a specified index.
* \param[in] num: The 'floatl' number from which to extract the hexadecimal digit.
* \param[in] index: The index indicating the position of the 4 - bit group in the mantissa.
* \return The 4 - bit hexadecimal digit (0 - 15) if valid, - 1 if the index is out of range.
*
* This function extracts a 4 - bit hexadecimal digit from the mantissa of a 'floatl' number at the given index.
* It first calculates the bit position within the mantissa based on the index. Then it extracts the relevant bits
* from the appropriate part of the mantissa. If there are insufficient bits in the current part, it may pick up
* bits from the next part.
*/
static char floatl_mant_hex(floatl num, unsigned int index)
{
// Calculate the starting bit position of the 4 - bit group
int bbit = __FLOATL_MANT_BITS__ - 4 - index * 4;
// Determine the part of the mantissa array where the bit is located
int part = bbit / 32;
// Determine the bit position within the part
int bit = bbit % 32;
char c = 0;
// If the bit position is negative, the index is out of range
if (bbit < 0) return -1;
if (part < __FLOATL_MANT_PARTS__)
{
// Extract the 4 - bit group from the current part of the mantissa
c = (num.mantissas[part] >> bit) & 0xF;
/* If there are insufficient bits in the current part, pick up bits from the next part */
if (bit > 28)
{
if (part == __FLOATL_MANT_PARTS__ - 1)
// If it's the last part, pick up bits from the high - order mantissa
c |= ((num.mantissa << (32 - bit)) & 0xF);
else
// Otherwise, pick up bits from the next part of the mantissa array
c |= ((num.mantissas[part + 1] << (32 - bit)) & 0xF);
}
}
else
// If the bit is in the high - order mantissa, extract it directly
c = (num.mantissa >> bit) & 0xF;
return c;
}
/**
* \brief Convert a 'floatl' number to a string in the %f format.
* \param[in] num: The 'floatl' number to be converted.
* \param[out] buffer: A pointer to the character buffer where the converted string will be stored.
* \param[in] size: The size of the buffer.
* \param[in] flags: Formatting flags that control the output appearance, such as left - alignment, zero - padding, etc.
* \param[in] width: The minimum width of the output string.
* \param[in] precision: The number of digits after the decimal point.
* \return The length of the converted string.
*
* This function converts a 'floatl' number to a string in the %f format. It first extracts the integer and fractional parts of the number.
* Then it handles the sign prefix, performs right - or left - alignment based on the flags, and rounds the result if necessary.
*/
static int floatl_convert_f(floatl num, char *buffer, uint32_t size, uint32_t flags, uint32_t width, uint32_t precision)
{
char c = 0;
int length = 0;
// Maximum available space in the buffer (excluding the null terminator)
uint32_t max = size - 1;
floatl temp;
char prefix[2] = {0, 0};
uint32_t prelen = 0;
// Length of the fractional part (including the decimal point if precision > 0)
uint32_t postlen = (precision > 0) ? precision + 1 : 0;
/* Convert the integer part */
temp = num;
// Extract digits of the integer part from the lowest to the highest
while ((c = floatlIntLowChar(&temp)) > 0) PRINT_CHAR(c);
/* The integer part is 0 */
if (length == 0) PRINT_CHAR('0');
/* Handle the sign prefix */
if (num.sign) prefix[prelen++] = '-'; // Append negative sign
else if (flags & FLAGS_PLUS) prefix[prelen++] = '+';
/* Right - alignment */
if (!((flags & FLAGS_LEFT)) && (flags & FLAGS_ZEROPAD))
// Pad with zeros if right - aligned and zero - padding is enabled
while (length + prelen + postlen < width) PRINT_CHAR('0');
// Print the sign prefix
while (prelen > 0) PRINT_CHAR(prefix[--prelen]);
if (!((flags & FLAGS_LEFT)) && !(flags & FLAGS_ZEROPAD))
// Pad with spaces if right - aligned and zero - padding is disabled
while (length + postlen < width) PRINT_CHAR(' ');
/* Reverse the integer part */
reverse(buffer, length);
/* Convert the fractional part */
if (precision > 0)
{
/* Decimal point */
PRINT_CHAR('.');
/* Fractional part */
temp = num;
// Extract digits of the fractional part from the highest to the lowest
while (precision && (c = floatlDitHighChar(&temp)) > 0) { PRINT_CHAR(c); precision--; }
/* Format alignment */
// Pad with zeros if the fractional part is shorter than the specified precision
while (precision--) PRINT_CHAR('0');
}
else
{
temp = num;
c = '0';
}
/* If the next digit is valid, continue to calculate the next digit */
if (c > 0)
{
c = floatlDitHighChar(&temp);
if (c >= '5')
{
char *begin = buffer;
char *end = &buffer[length - 1];
char *move = NULL;
if (*begin == '+' || *begin == '-') begin++;
// Perform carry addition for rounding
move = floatl_carry_add(begin, end, 0);
if (move)
{
if (move > begin)
{
*(move - 1) = *move;
}
else
{
PRINT_CHAR('0');
memmove(move + 1, move, end - move + 1);
}
*move = '1';
}
}
}
/* Left - alignment */
if (flags & FLAGS_LEFT)
// Pad with spaces if left - aligned
while (length < width) PRINT_CHAR(' ');
buffer[length] = 0;
return length;
}
/**
* \brief Convert a 'floatl' number to a string in the %a format.
* \param[in] num: The 'floatl' number to be converted.
* \param[out] buffer: A pointer to the character buffer where the converted string will be stored.
* \param[in] size: The size of the buffer.
* \param[in] flags: Formatting flags that control the output appearance, such as case, left - alignment, etc.
* \param[in] width: The minimum width of the output string.
* \param[in] precision: The number of hexadecimal digits after the decimal point.
* \return The length of the converted string, or - 2 if there is not enough space for the exponent part.
*
* This function converts a 'floatl' number to a string in the %a format, which represents the number in hexadecimal
* floating - point notation. It first converts the exponent part and stores it at the end of the buffer. Then it
* handles the integer and fractional parts, taking into account formatting flags such as sign, alignment, and case.
* Finally, it rounds the result if necessary and moves the exponent part to the appropriate position.
*/
static int floatl_convert_a(floatl num, char *buffer, uint32_t size, uint32_t flags, uint32_t width, uint32_t precision)
{
char c = 0;
int length = 0;
// Maximum available space in the buffer (excluding the null terminator)
uint32_t max = size - 1;
floatl temp;
// Pointer to the position where the exponent part will be stored
char *expBase = buffer + size;
char prefix[2] = {0, 0};
uint32_t prelen = 0;
uint32_t postlen = 0;
// Calculate the actual exponent value
int32_t exp = ((int)num.exponent) - __FLOATL_EXP_MID_VALUE__;
// Get the hexadecimal digit for rounding
char carry = floatl_mant_hex(num, precision);
/* Convert the exponent part and store it at the end of the buffer */
if (exp == 0) *(--expBase) = '0';
else
{
int32_t texp = exp;
if (texp < 0) texp = -texp;
// Convert the exponent to decimal digits and store them in reverse order
for ( ; texp != 0; texp /= 10)
{
c = texp % 10 + '0';
*(--expBase) = c;
if (expBase < buffer + 2) return -2;
}
}
/* Calculate the remaining length after the integer part */
postlen = ((precision > 0) ? precision + 1 : 0) + (buffer + size - expBase) + 2; // .d + p+/- + e
/* Integer part */
temp = num;
if (temp.exponent == 0) PRINT_CHAR('0');
else PRINT_CHAR('1');
// Print 'X' or 'x' based on the case flag
PRINT_CHAR((flags & FLAGS_CASE) ? 'X' : 'x');
PRINT_CHAR('0');
/* Handle the sign prefix */
if (num.sign) prefix[prelen++] = '-'; // Append negative sign
else if (flags & FLAGS_PLUS) prefix[prelen++] = '+';
/* Right - alignment */
if (!((flags & FLAGS_LEFT)) && (flags & FLAGS_ZEROPAD))
// Pad with zeros if right - aligned and zero - padding is enabled
while (length + prelen + postlen < width) PRINT_CHAR('0');
// Print the sign prefix
while (prelen > 0) PRINT_CHAR(prefix[--prelen]);
if (!((flags & FLAGS_LEFT)) && !(flags & FLAGS_ZEROPAD))
// Pad with spaces if right - aligned and zero - padding is disabled
while (length + postlen < width) PRINT_CHAR(' ');
/* Reverse the integer part */
reverse(buffer, length);
/* Convert the fractional part */
if (precision > 0)
{
/* Decimal point */
PRINT_CHAR('.');
/* Fractional part */
temp = num;
temp.sign = 0;
if (floatl_int_cmp(temp, __FLOATL_CONST_0__) == 0)
{
PRINT_CHAR('0');
precision--;
}
else
{
int index = 0;
// Extract and print hexadecimal digits of the fractional part
while (precision)
{
c = floatl_mant_hex(num, index);
if (c < 0) break;
if (c < 10) c = c + '0';
else c = (c - 10) + ((flags & FLAGS_CASE) ? 'A' : 'a');
PRINT_CHAR(c);
precision--;
index++;
}
}
/* Format alignment */
// Pad with zeros if the fractional part is shorter than the specified precision
while (precision--) PRINT_CHAR('0');
}
// Rounding: If the carry digit is greater than 8, perform carry addition
if (carry > 8)
{
char *begin = buffer;
char *end = &buffer[length - 1];
if (*begin == '+' || *begin == '-') begin++;
floatl_carry_add_h(begin, end, (flags & FLAGS_CASE));
}
/* Move the exponent part to the appropriate position */
// Print 'P' or 'p' based on the case flag
PRINT_CHAR((flags & FLAGS_CASE) ? 'P' : 'p');
// Print the sign of the exponent
PRINT_CHAR(exp < 0 ? '-' : '+');
// Print the exponent digits
while (expBase < buffer + size) PRINT_CHAR(*(expBase++));
/* Left - alignment */
if (flags & FLAGS_LEFT)
// Pad with spaces if left - aligned
while (length < width) PRINT_CHAR(' ');
buffer[length] = 0;
return length;
}
/**
* \brief Convert a 'floatl' number to a string in the %e format.
* \param[in] num: The 'floatl' number to be converted.
* \param[out] buffer: A pointer to the character buffer where the converted string will be stored.
* \param[in] size: The size of the buffer.
* \param[in] flags: Formatting flags that control the output appearance, such as case, left - alignment, etc.
* \param[in] width: The minimum width of the output string.
* \param[in] precision: The number of digits after the decimal point.
* \return The length of the converted string, or - 2 if there is not enough space for the exponent part.
*
* This function converts a 'floatl' number to a string in the %e format, which represents the number in scientific notation.
* It first extracts the integer and fractional parts of the number, calculates the exponent, and stores the exponent digits at the end of the buffer.
* Then it handles the sign prefix, performs right - or left - alignment based on the flags, and rounds the result if necessary.
*/
static int floatl_convert_e(floatl num, char *buffer, uint32_t size, uint32_t flags, uint32_t width, uint32_t precision)
{
char c = 0;
int length = 0;
// Maximum available space in the buffer (excluding the null terminator)
uint32_t max = size - 1;
floatl temp;
// Pointer to the position where the exponent part will be stored
char *expBase = buffer + size;
char prefix[2] = {0, 0};
uint32_t prelen = 0;
uint32_t postlen = 0;
int offset = 0;
int32_t exp = 0;
char *point = NULL;
// Buffer to store digits, with extra space for integer and precision digits
char buf[precision + 2];
FL_STACK dstack = {buf, sizeof(buf), 0, 0};
/* Convert the integer part and push digits onto the stack */
temp = num;
while ((c = floatlIntLowChar(&temp)) > 0)
{
fl_stack_push(&dstack, c);
offset++;
}
/* Calculate the exponent of the number */
temp = num;
if (offset > 0)
{
// If there are integer digits, process the fractional part and set the exponent
while ((c = floatlDitHighChar(&temp)) > 0) fl_stack_push_h(&dstack, c);
exp = offset - 1;
}
else
{
if (num.exponent == 0) exp = 0;
else
{
// If there are no integer digits, find the first non - zero fractional digit and set the exponent
while ((c = floatlDitHighChar(&temp)) > 0)
{
offset++;
if (c != '0')
{
fl_stack_push_h(&dstack, c);
while (dstack.size < dstack.capacity && (c = floatlDitHighChar(&temp)) > 0) fl_stack_push_h(&dstack, c);
exp = -offset;
break;
}
}
}
}
// Pad the stack with zeros if necessary
while (dstack.size < dstack.capacity) fl_stack_push_h(&dstack, '0');
// Pop the first digit and print it
fl_stack_pop(&dstack, &c);
PRINT_CHAR(c);
/* Convert the exponent part and store it at the end of the buffer */
if (exp == 0) *(--expBase) = '0';
else
{
int32_t texp = exp;
if (texp < 0) texp = -texp;
// Convert the exponent to decimal digits and store them in reverse order
for ( ; texp != 0; texp /= 10)
{
c = texp % 10 + '0';
*(--expBase) = c;
if (expBase < buffer + 2) return -2;
}
}
// Ensure the exponent part has at least three digits
while (buffer + size - expBase < 3) *(--expBase) = '0';
/* Calculate the remaining length after the integer part */
postlen = ((precision > 0) ? precision + 1 : 0) + (buffer + size - expBase) + 2; // .d + e+/- + e
/* Handle the sign prefix */
if (num.sign) prefix[prelen++] = '-'; // Append negative sign
else if (flags & FLAGS_PLUS) prefix[prelen++] = '+';
/* Right - alignment */
if (!((flags & FLAGS_LEFT)) && (flags & FLAGS_ZEROPAD))
// Pad with zeros if right - aligned and zero - padding is enabled
while (length + prelen + postlen < width) PRINT_CHAR('0');
// Print the sign prefix
while (prelen > 0) PRINT_CHAR(prefix[--prelen]);
if (!((flags & FLAGS_LEFT)) && !(flags & FLAGS_ZEROPAD))
// Pad with spaces if right - aligned and zero - padding is disabled
while (length + postlen < width) PRINT_CHAR(' ');
/* Reverse the integer part */
reverse(buffer, length);
if (precision > 0)
{
/* Decimal point */
PRINT_CHAR('.');
point = &buffer[length - 1];
// Pop digits from the stack and print them as the fractional part
while (precision && fl_stack_pop(&dstack, &c)) { PRINT_CHAR(c); precision--; }
}
/* Check if rounding is needed */
if (dstack.size > 0)
{
fl_stack_pop(&dstack, &c);
// If the next digit is 5 or greater, perform rounding
if (c >= '5')
{
char *move = NULL;
// Determine if a carry is needed in the fractional part
if (point)
{
if (floatl_carry_add(point + 1, &buffer[length - 1], '.')) move = point - 1;
}
else move = &buffer[length - 1];
// If a carry is needed in the integer part
if (move)
{
// If the integer digit can be incremented
if (*move >= '0' && *move < '9') (*move)++;
// If the integer digit is 9 and needs to be carried over
else
{
// Set the integer digit to 1
*move = '1';
// Increment the exponent part
if (exp >= 0) move = floatl_carry_add(expBase, buffer + size - 1, 0);
else move = floatl_carry_sub(expBase, buffer + size - 1, 0);
// If the exponent part needs to be carried over
if (move)
{
if (move <= (buffer + length + 2)) return -2;
*(--expBase) = '1';
}
// Increment the exponent value
exp++;
}
}
}
}
/* Move the exponent part to the appropriate position */
// Print 'E' or 'e' based on the case flag
PRINT_CHAR((flags & FLAGS_CASE) ? 'E' : 'e');
// Print the sign of the exponent
PRINT_CHAR(exp < 0 ? '-' : '+');
// Print the exponent digits
while (expBase < buffer + size) PRINT_CHAR(*(expBase++));
/* Left - alignment */
if (flags & FLAGS_LEFT)
// Pad with spaces if left - aligned
while (length < width) PRINT_CHAR(' ');
buffer[length] = 0;
return length;
}
/**
* \brief Converts a 'floatl' number to a string.
*
* This function takes a 'floatl' number and converts it into a string representation
* according to a printf-like format string. The resulting string is stored in the provided buffer.
*
* \param[in] a: The 'floatl' number to be converted.
* \param[out] buffer: A pointer to the buffer where the resulting string will be stored.
* The buffer should be large enough to hold the entire string representation.
* \param[in] size: The size of the buffer, indicating the maximum number of characters it can hold.
* \param[in] format: A printf-like format string, structured as [flags][width][type].
* Flags can be '0' (zero-padding), '-' (left-alignment), '+' (force sign),
* ' ' (space for positive numbers), '#' (alternate form).
* Width is a decimal number specifying the minimum width of the output.
* Type can be 'a' 'A' 'e' 'E' 'f' 'F' 'g' 'G', determining the output format.
* \return The length of the successfully converted string.
* Returns 0 if the conversion is invalid.
* Returns -1 if the 'buffer' pointer is NULL.
* Returns -2 if the 'size' of the buffer is too small.
* Returns -3 if the 'format' pointer is NULL.
*/
int floatl_print(floatl a, char *buffer, uint32_t size, const char *format)
{
int length = 0;
// Flags used to control the formatting options of the output
uint32_t flags = 0;
// Minimum width of the output string
uint32_t width = 0;
// Number of digits after the decimal point, default value is 6
uint32_t precision = 6;
// Maximum available space in the buffer, excluding the null terminator
uint32_t max = size - 1;
// Check if the buffer pointer is NULL. If so, return -1 indicating an error.
if (!buffer) return -1;
// Check if the buffer size is less than 2. If so, return -2 as it's too small to hold a valid string.
if (size < 2) return -2;
// Check if the format pointer is NULL. If so, return -3 indicating an error.
if (!format) return -3;
// Skip the '%' character if it's the first character in the format string.
if (*format == '%') format++;
// Parse the format string character by character
for (; *format; format++)
{
// If the length of the converted string is already greater than 0, break the loop.
if (length > 0) break;
// Parse the formatting flags in the format string
while (1)
{
if (*format == '0')
{
// Set the zero-padding flag
flags |= FLAGS_ZEROPAD;
format++;
}
else if (*format == '-')
{
// Set the left-alignment flag
flags |= FLAGS_LEFT;
format++;
}
else if (*format == '+')
{
// Set the force sign flag
flags |= FLAGS_PLUS;
format++;
}
else if (*format == ' ')
{
// Set the space for positive numbers flag
flags |= FLAGS_SPACE;
format++;
}
else if (*format == '#')
{
// Set the alternate form flag
flags |= FLAGS_HASH;
format++;
}
else
{
// Exit the loop if no more flags are found
break;
}
}
/* Convert the width specified in the format string */
// Parse the characters representing the width in the format string
while ((*format >= '0') && (*format <= '9'))
{
// Convert the digit characters to an integer and update the width value
width = width * 10U + (unsigned int)(*format++ - '0');
}
// Check if the specified width exceeds the available buffer space. If so, return -1.
if (width > max) return -1;
// Check if the precision is specified in the format string
if (*format == '.')
{
// Reset the precision value
precision = 0;
format++;
// Parse the characters representing the precision in the format string
while ((*format >= '0') && (*format <= '9'))
{
// Convert the digit characters to an integer and update the precision value
precision = precision * 10U + (unsigned int)(*format++ - '0');
}
}
// Check if the specified precision exceeds the available buffer space (minus 2 for possible sign and decimal point). If so, return -1.
if (precision > max - 2) return -1;
/* Distribute the format based on the type specifier */
// Determine the appropriate conversion function according to the type specifier in the format string
switch (*format)
{
case 'A':
// Set the case flag for uppercase output
flags |= FLAGS_CASE;
case 'a': // Floating-point number in hexadecimal and [P-] notation
{
// Call the function to convert the number to hexadecimal floating-point format
length = floatl_convert_a(a, buffer, size, flags, width, precision);
}
break;
case 'E':
// Set the case flag for uppercase output
flags |= FLAGS_CASE;
case 'e': // Floating-point number in exponential [e-] notation
{
// Call the function to convert the number to exponential floating-point format
length = floatl_convert_e(a, buffer, size, flags, width, precision);
}
break;
case 'G':
case 'g': // Floating-point number, without displaying meaningless zeros
case 'F':
case 'f': // Floating-point number
{
// Call the function to convert the number to regular floating-point format
length = floatl_convert_f(a, buffer, size, flags, width, precision);
}
break;
default:
// Return 0 if the type specifier is invalid
return 0;
}
}
// Add a null terminator to the end of the converted string
buffer[length] = 0;
// Return the length of the converted string
return length;
}