mirror of
https://gitee.com/Lamdonn/varch.git
synced 2025-12-06 16:56:42 +08:00
182 lines
7.2 KiB
Markdown
182 lines
7.2 KiB
Markdown
## Introduction
|
|
|
|
`floatl` is a large floating-point number module that allows operations on large floating-point numbers exceeding the limits of standard C language data types. It provides a flexible long-integer representation, using 16-bit and 32-bit segments for storage.
|
|
|
|
In current mainstream computer systems, the maximum integer types generally supported are 32-bit or 64-bit integers, which can meet the computational and storage requirements in general cases. However, they become inadequate when dealing with larger numbers.
|
|
|
|
As an additional extension for floating-point numbers, the `floatl` module adopts a storage mechanism consistent with `IEEE 754` and is very convenient to use. It supports the following calculations:
|
|
|
|
- Basic arithmetic operations: addition, subtraction, multiplication, and division.
|
|
- Functions to convert strings and standard floating-point numbers to large floating-point numbers.
|
|
- Functions to output large floating-point numbers in decimal, `[P-]`, and `[E-]` formats.
|
|
|
|
`floatl` can be configured as 64-bit, 128-bit, 256-bit, 512-bit, 1024-bit, 2048-bit, 4096-bit, or 8192-bit as needed. If these configurations are still insufficient, it also provides functions to generate larger bit configurations.
|
|
|
|
## Interfaces
|
|
|
|
### Structure Definitions
|
|
|
|
#### `floatl`
|
|
|
|
A structure used to represent long-integer floating-point numbers, using a union to store floating-point numbers in two different ways:
|
|
|
|
- An array of `uint16_t` type (16-bit segments).
|
|
- An array of `uint32_t` type (32-bit segments).
|
|
|
|
```c
|
|
typedef struct {
|
|
union {
|
|
uint16_t u16[__FLOATL_U16_PARTS__]; ///< Array of uint16_t values representing the long bit integer in 16-bit segments.
|
|
uint32_t u32[__FLOATL_U32_PARTS__]; ///< Array of uint32_t values representing the long bit integer in 32-bit segments.
|
|
struct {
|
|
uint32_t mantissas[__FLOATL_MANT_PARTS__];
|
|
uint32_t mantissa : __FLOATL_MANT_HIGH_BITS__;
|
|
uint32_t exponent : __FLOATL_EXP_BITS__;
|
|
uint32_t sign : 1;
|
|
};
|
|
};
|
|
} floatl;
|
|
```
|
|
|
|
### Definition Functions
|
|
|
|
```c
|
|
floatl floatl_from(const char *str);
|
|
floatl floatl_from_f(float value);
|
|
floatl floatl_from_d(double value);
|
|
```
|
|
|
|
`floatl` provides two types of definition functions.
|
|
|
|
`floatl_from` parses a `floatl` from a numeric string and supports parsing strings in decimal, `[P-]`, and `[E-]` formats.
|
|
`floatl_from_f` creates a `floatl` from a `float` type.
|
|
`floatl_from_d` creates a `floatl` from a `double` type. This function is encapsulated into a shorter macro definition `floatl()`, which is more in line with usage habits and has higher execution efficiency.
|
|
|
|
When using these three functions, if the value is within the range of `double`, it is recommended to use the `floatl()` method. When the value exceeds the range of `double` or when different number systems are required for definition, the `floatl_from` method can be used.
|
|
|
|
Meanwhile, you can also use the macro definitions of common values such as `FLOATL_INF`, `FLOATL_NAN`, `FLOATL_CONST_0`, `FLOATL_CONST_1`, `FLOATL_CONST_10`, etc.
|
|
|
|
Example:
|
|
```c
|
|
static void test_define(void)
|
|
{
|
|
floatl a = floatl(0.0);
|
|
floatl b = floatl(10.0);
|
|
floatl c = floatl(-3.14);
|
|
floatl d = floatl(1.23456789e+10);
|
|
floatl e = floatl(0x1.23456789p+10);
|
|
|
|
floatl f = floatl_from("0.0");
|
|
floatl g = floatl_from("10.0");
|
|
floatl h = floatl_from("-3.14");
|
|
floatl i = floatl_from("1.23456789e+10");
|
|
floatl j = floatl_from("0x1.23456789p+10");
|
|
|
|
floatl zero = FLOATL_CONST_0;
|
|
floatl one = FLOATL_CONST_1;
|
|
floatl ten = FLOATL_CONST_10;
|
|
|
|
printf("size %d\r\n", sizeof(floatl));
|
|
}
|
|
```
|
|
|
|
### Conversion Functions
|
|
|
|
```c
|
|
int floatl_print(floatl a, char *buffer, uint32_t size, const char *format);
|
|
#define floatl_show(a, b, s, f) (floatl_print((a), (b), (s), (f)) > 0 ? (b) : "invalid")
|
|
```
|
|
|
|
This function converts the value of `a` into decimal, `[P-]`, and `[E-]` formats according to the specified format and stores the result in the passed `buffer`. It then returns the address of the valid part of the `buffer`.
|
|
`buffer` and `size` are the address and size of the buffer used to store the conversion result, respectively.
|
|
`format` is similar to that used in `printf`, with the format ```[flags][width][type]```, such as `0.2f`, `+5.2a`, etc.
|
|
|
|
```c
|
|
static void test_print(void)
|
|
{
|
|
floatl a = floatl_from("1234567890.123456789");
|
|
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%f"));
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%.2f"));
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%020.2f"));
|
|
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%+.6a"));
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%+20.6a"));
|
|
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%-20.6e"));
|
|
}
|
|
```
|
|
|
|
Results:
|
|
```
|
|
a 1234567890.123451
|
|
a 1234567890.12
|
|
a 00000001234567890.12
|
|
a +0x1.26580bp+30
|
|
a +0x1.26580bp+30
|
|
a 1.234568e+009
|
|
```
|
|
|
|
### Arithmetic Functions
|
|
|
|
```c
|
|
floatl floatl_add(floatl a, floatl b); // a+b
|
|
floatl floatl_sub(floatl a, floatl b); // a-b
|
|
floatl floatl_mul(floatl a, floatl b); // a*b
|
|
floatl floatl_div(floatl a, floatl b); // a/b
|
|
floatl floatl_abs(floatl a); // |a|
|
|
floatl floatl_neg(floatl a); // -a
|
|
int floatl_eq(floatl a, floatl b); // a<b
|
|
int floatl_ne(floatl a, floatl b); // a<=b
|
|
int floatl_lt(floatl a, floatl b); // a==b
|
|
int floatl_le(floatl a, floatl b); // a!=b
|
|
int floatl_gt(floatl a, floatl b); // a>b
|
|
int floatl_ge(floatl a, floatl b); // a>=b
|
|
```
|
|
|
|
The arithmetic functions of `floatl` are consistent with the corresponding symbolic operations.
|
|
|
|
Usage example:
|
|
```c
|
|
static void test_calculate(void)
|
|
{
|
|
floatl a = floatl_from("12345678901234567890.123456789");
|
|
floatl b = floatl_from("98765432109876543210.987654321");
|
|
|
|
printf("a %s\r\n", floatl_show(a, buffer, sizeof(buffer), "%f"));
|
|
printf("b %s\r\n", floatl_show(b, buffer, sizeof(buffer), "%f"));
|
|
|
|
printf("a + b: %s\r\n", floatl_show(floatl_add(a, b), buffer, sizeof(buffer), "%f"));
|
|
printf("a - b: %s\r\n", floatl_show(floatl_sub(a, b), buffer, sizeof(buffer), "%f"));
|
|
printf("a * b: %s\r\n", floatl_show(floatl_mul(a, b), buffer, sizeof(buffer), "%f"));
|
|
printf("a / b: %s\r\n", floatl_show(floatl_div(a, b), buffer, sizeof(buffer), "%f"));
|
|
printf("-a: %s\r\n", floatl_show(floatl_neg(a), buffer, sizeof(buffer), "%f"));
|
|
printf("|a|: %s\r\n", floatl_show(floatl_abs(a), buffer, sizeof(buffer), "%f"));
|
|
printf("a > b: %d\r\n", floatl_gt(a, b));
|
|
printf("a >= b: %d\r\n", floatl_ge(a, b));
|
|
printf("a < b: %d\r\n", floatl_lt(a, b));
|
|
printf("a <= b: %d\r\n", floatl_le(a, b));
|
|
printf("a == b: %d\r\n", floatl_eq(a, b));
|
|
printf("a != b: %d\r\n", floatl_ne(a, b));
|
|
}
|
|
```
|
|
|
|
Results:
|
|
```
|
|
a 12345678901234567890.123457
|
|
b 98765432109876543210.987654
|
|
a + b: 111111111011111111101.111111
|
|
a - b: -86419753208641975320.864198
|
|
a * b: 1219326311370217952261850327336229233322.374638
|
|
a / b: 0.125000
|
|
-a: -12345678901234567890.123457
|
|
|a|: 12345678901234567890.123457
|
|
a > b: 0
|
|
a >= b: 0
|
|
a < b: 1
|
|
a <= b: 1
|
|
a == b: 0
|
|
a != b: 1
|
|
```
|
|
|