ysyx-workbench/abstract-machine/klib/src/int64.c
xinyangli 8e4feb4010 NJU-ProjectN/abstract-machine ics2023 initialized
NJU-ProjectN/abstract-machine 3348db971fd860be5cb28e21c18f9d0e65d0c96a Merge pull request #8 from Jasonyanyusong/master
2023-12-21 00:20:42 +08:00

731 lines
18 KiB
C

// divmoddi4.c from The LLVM Compiler Infrastructure
/* Assumption: Signed integral is 2's complement. */
/* Assumption: Right shift of signed negative is arithmetic shift. */
/* Assumption: Endianness is little or big (not mixed). */
#if defined(__ELF__)
#define FNALIAS(alias_name, original_name) \
void alias_name() __attribute__((__alias__(#original_name)))
#define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee)))
#else
#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")")
#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")")
#endif
/* ABI macro definitions */
#if __ARM_EABI__
# ifdef COMPILER_RT_ARMHF_TARGET
# define COMPILER_RT_ABI
# else
# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs")))
# endif
#else
# define COMPILER_RT_ABI
#endif
#define AEABI_RTABI __attribute__((__pcs__("aapcs")))
#ifdef _MSC_VER
#define ALWAYS_INLINE __forceinline
#define NOINLINE __declspec(noinline)
#define NORETURN __declspec(noreturn)
#define UNUSED
#else
#define ALWAYS_INLINE __attribute__((always_inline))
#define NOINLINE __attribute__((noinline))
#define NORETURN __attribute__((noreturn))
#define UNUSED __attribute__((unused))
#endif
#if defined(__NetBSD__) && (defined(_KERNEL) || defined(_STANDALONE))
/*
* Kernel and boot environment can't use normal headers,
* so use the equivalent system headers.
*/
# include <machine/limits.h>
# include <sys/stdint.h>
# include <sys/types.h>
#else
/* Include the standard compiler builtin headers we use functionality from. */
# include <limits.h>
# include <stdint.h>
# include <stdbool.h>
# include <float.h>
#endif
/* Include the commonly used internal type definitions. */
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
defined(__ORDER_LITTLE_ENDIAN__)
/* Clang and GCC provide built-in endianness definitions. */
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define _YUGA_LITTLE_ENDIAN 0
#define _YUGA_BIG_ENDIAN 1
#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#endif /* __BYTE_ORDER__ */
#else /* Compilers other than Clang or GCC. */
#if defined(__SVR4) && defined(__sun)
#include <sys/byteorder.h>
#if defined(_BIG_ENDIAN)
#define _YUGA_LITTLE_ENDIAN 0
#define _YUGA_BIG_ENDIAN 1
#elif defined(_LITTLE_ENDIAN)
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#else /* !_LITTLE_ENDIAN */
#error "unknown endianness"
#endif /* !_LITTLE_ENDIAN */
#endif /* Solaris and AuroraUX. */
/* .. */
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
defined(__minix)
#include <sys/endian.h>
#if _BYTE_ORDER == _BIG_ENDIAN
#define _YUGA_LITTLE_ENDIAN 0
#define _YUGA_BIG_ENDIAN 1
#elif _BYTE_ORDER == _LITTLE_ENDIAN
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#endif /* _BYTE_ORDER */
#endif /* *BSD */
#if defined(__OpenBSD__)
#include <machine/endian.h>
#if _BYTE_ORDER == _BIG_ENDIAN
#define _YUGA_LITTLE_ENDIAN 0
#define _YUGA_BIG_ENDIAN 1
#elif _BYTE_ORDER == _LITTLE_ENDIAN
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#endif /* _BYTE_ORDER */
#endif /* OpenBSD */
/* .. */
/* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the
* compiler (at least with GCC) */
#if defined(__APPLE__) || defined(__ellcc__ )
#ifdef __BIG_ENDIAN__
#if __BIG_ENDIAN__
#define _YUGA_LITTLE_ENDIAN 0
#define _YUGA_BIG_ENDIAN 1
#endif
#endif /* __BIG_ENDIAN__ */
#ifdef __LITTLE_ENDIAN__
#if __LITTLE_ENDIAN__
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#endif
#endif /* __LITTLE_ENDIAN__ */
#endif /* Mac OSX */
/* .. */
#if defined(_WIN32)
#define _YUGA_LITTLE_ENDIAN 1
#define _YUGA_BIG_ENDIAN 0
#endif /* Windows */
#endif /* Clang or GCC. */
/* . */
#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN)
#error Unable to determine endian
#endif /* Check we found an endianness correctly. */
/* si_int is defined in Linux sysroot's asm-generic/siginfo.h */
#ifdef si_int
#undef si_int
#endif
typedef int si_int;
typedef unsigned su_int;
typedef long long di_int;
typedef unsigned long long du_int;
typedef union
{
di_int all;
struct
{
#if _YUGA_LITTLE_ENDIAN
su_int low;
si_int high;
#else
si_int high;
su_int low;
#endif /* _YUGA_LITTLE_ENDIAN */
}s;
} dwords;
typedef union
{
du_int all;
struct
{
#if _YUGA_LITTLE_ENDIAN
su_int low;
su_int high;
#else
su_int high;
su_int low;
#endif /* _YUGA_LITTLE_ENDIAN */
}s;
} udwords;
#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))// || defined(__riscv)
#define CRT_HAS_128BIT
#endif
#ifdef CRT_HAS_128BIT
typedef int ti_int __attribute__ ((mode (TI)));
typedef unsigned tu_int __attribute__ ((mode (TI)));
typedef union
{
ti_int all;
struct
{
#if _YUGA_LITTLE_ENDIAN
du_int low;
di_int high;
#else
di_int high;
du_int low;
#endif /* _YUGA_LITTLE_ENDIAN */
}s;
} twords;
typedef union
{
tu_int all;
struct
{
#if _YUGA_LITTLE_ENDIAN
du_int low;
du_int high;
#else
du_int high;
du_int low;
#endif /* _YUGA_LITTLE_ENDIAN */
}s;
} utwords;
static __inline ti_int make_ti(di_int h, di_int l) {
twords r;
r.s.high = h;
r.s.low = l;
return r.all;
}
static __inline tu_int make_tu(du_int h, du_int l) {
utwords r;
r.s.high = h;
r.s.low = l;
return r.all;
}
#endif /* CRT_HAS_128BIT */
typedef union
{
su_int u;
float f;
} float_bits;
typedef union
{
udwords u;
double f;
} double_bits;
typedef struct
{
#if _YUGA_LITTLE_ENDIAN
udwords low;
udwords high;
#else
udwords high;
udwords low;
#endif /* _YUGA_LITTLE_ENDIAN */
} uqwords;
typedef union
{
uqwords u;
long double f;
} long_double_bits;
#if __STDC_VERSION__ >= 199901L
typedef float _Complex Fcomplex;
typedef double _Complex Dcomplex;
typedef long double _Complex Lcomplex;
#define COMPLEX_REAL(x) __real__(x)
#define COMPLEX_IMAGINARY(x) __imag__(x)
#else
typedef struct { float real, imaginary; } Fcomplex;
typedef struct { double real, imaginary; } Dcomplex;
typedef struct { long double real, imaginary; } Lcomplex;
#define COMPLEX_REAL(x) (x).real
#define COMPLEX_IMAGINARY(x) (x).imaginary
#endif
/* Include internal utility function declarations. */
/** \brief Trigger a program abort (or panic for kernel code). */
#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__)
NORETURN void compilerrt_abort_impl(const char *file, int line,
const char *function);
#define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__)
#define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt)
#define COMPILE_TIME_ASSERT2(expr, cnt) \
typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED
COMPILER_RT_ABI si_int __paritysi2(si_int a);
COMPILER_RT_ABI si_int __paritydi2(di_int a);
COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b);
COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b);
COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d);
COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem);
COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem);
#ifdef CRT_HAS_128BIT
COMPILER_RT_ABI si_int __clzti2(ti_int a);
COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);
#endif
/* Definitions for builtins unavailable on MSVC */
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h>
uint32_t __inline __builtin_ctz(uint32_t value) {
unsigned long trailing_zero = 0;
if (_BitScanForward(&trailing_zero, value))
return trailing_zero;
return 32;
}
uint32_t __inline __builtin_clz(uint32_t value) {
unsigned long leading_zero = 0;
if (_BitScanReverse(&leading_zero, value))
return 31 - leading_zero;
return 32;
}
#if defined(_M_ARM) || defined(_M_X64)
uint32_t __inline __builtin_clzll(uint64_t value) {
unsigned long leading_zero = 0;
if (_BitScanReverse64(&leading_zero, value))
return 63 - leading_zero;
return 64;
}
#else
uint32_t __inline __builtin_clzll(uint64_t value) {
if (value == 0)
return 64;
uint32_t msh = (uint32_t)(value >> 32);
uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF);
if (msh != 0)
return __builtin_clz(msh);
return 32 + __builtin_clz(lsh);
}
#endif
#define __builtin_clzl __builtin_clzll
#endif /* defined(_MSC_VER) && !defined(__clang__) */
#include <am.h>
#if !defined(__ARCH_RISCV64_MYCPU)
/* Returns: a / b */
COMPILER_RT_ABI di_int
__divdi3(di_int a, di_int b)
{
const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */
di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */
a = (a ^ s_a) - s_a; /* negate if s_a == -1 */
b = (b ^ s_b) - s_b; /* negate if s_b == -1 */
s_a ^= s_b; /*sign of quotient */
return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */
}
/* Returns: a / b, *rem = a % b */
COMPILER_RT_ABI di_int
__divmoddi4(di_int a, di_int b, di_int* rem)
{
di_int d = __divdi3(a,b);
*rem = a - (d*b);
return d;
}
/* Returns: a % b */
COMPILER_RT_ABI di_int
__moddi3(di_int a, di_int b)
{
const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1;
di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */
b = (b ^ s) - s; /* negate if s == -1 */
s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */
a = (a ^ s) - s; /* negate if s == -1 */
du_int r;
__udivmoddi4(a, b, &r);
return ((di_int)r ^ s) - s; /* negate if s == -1 */
}
/* Returns: a / b */
COMPILER_RT_ABI du_int
__udivdi3(du_int a, du_int b)
{
return __udivmoddi4(a, b, 0);
}
/* Returns: a % b */
COMPILER_RT_ABI du_int
__umoddi3(du_int a, du_int b)
{
du_int r;
__udivmoddi4(a, b, &r);
return r;
}
#endif
COMPILER_RT_ABI du_int
__udivmoddi4(du_int a, du_int b, du_int* rem)
{
const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
udwords n;
n.all = a;
udwords d;
d.all = b;
udwords q;
udwords r;
unsigned sr;
/* special cases, X is unknown, K != 0 */
if (n.s.high == 0)
{
if (d.s.high == 0)
{
/* 0 X
* ---
* 0 X
*/
if (rem)
*rem = n.s.low % d.s.low;
return n.s.low / d.s.low;
}
/* 0 X
* ---
* K X
*/
if (rem)
*rem = n.s.low;
return 0;
}
/* n.s.high != 0 */
if (d.s.low == 0)
{
if (d.s.high == 0)
{
/* K X
* ---
* 0 0
*/
if (rem)
*rem = n.s.high % d.s.low;
return n.s.high / d.s.low;
}
/* d.s.high != 0 */
if (n.s.low == 0)
{
/* K 0
* ---
* K 0
*/
if (rem)
{
r.s.high = n.s.high % d.s.high;
r.s.low = 0;
*rem = r.all;
}
return n.s.high / d.s.high;
}
/* K K
* ---
* K 0
*/
if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */
{
if (rem)
{
r.s.low = n.s.low;
r.s.high = n.s.high & (d.s.high - 1);
*rem = r.all;
}
return n.s.high >> __builtin_ctz(d.s.high);
}
/* K K
* ---
* K 0
*/
sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
/* 0 <= sr <= n_uword_bits - 2 or sr large */
if (sr > n_uword_bits - 2)
{
if (rem)
*rem = n.all;
return 0;
}
++sr;
/* 1 <= sr <= n_uword_bits - 1 */
/* q.all = n.all << (n_udword_bits - sr); */
q.s.low = 0;
q.s.high = n.s.low << (n_uword_bits - sr);
/* r.all = n.all >> sr; */
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
}
else /* d.s.low != 0 */
{
if (d.s.high == 0)
{
/* K X
* ---
* 0 K
*/
if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */
{
if (rem)
*rem = n.s.low & (d.s.low - 1);
if (d.s.low == 1)
return n.all;
sr = __builtin_ctz(d.s.low);
q.s.high = n.s.high >> sr;
q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
return q.all;
}
/* K X
* ---
* 0 K
*/
sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
/* 2 <= sr <= n_udword_bits - 1
* q.all = n.all << (n_udword_bits - sr);
* r.all = n.all >> sr;
*/
if (sr == n_uword_bits)
{
q.s.low = 0;
q.s.high = n.s.low;
r.s.high = 0;
r.s.low = n.s.high;
}
else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1
{
q.s.low = 0;
q.s.high = n.s.low << (n_uword_bits - sr);
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
}
else // n_uword_bits + 1 <= sr <= n_udword_bits - 1
{
q.s.low = n.s.low << (n_udword_bits - sr);
q.s.high = (n.s.high << (n_udword_bits - sr)) |
(n.s.low >> (sr - n_uword_bits));
r.s.high = 0;
r.s.low = n.s.high >> (sr - n_uword_bits);
}
}
else
{
/* K X
* ---
* K K
*/
sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
/* 0 <= sr <= n_uword_bits - 1 or sr large */
if (sr > n_uword_bits - 1)
{
if (rem)
*rem = n.all;
return 0;
}
++sr;
/* 1 <= sr <= n_uword_bits */
/* q.all = n.all << (n_udword_bits - sr); */
q.s.low = 0;
if (sr == n_uword_bits)
{
q.s.high = n.s.low;
r.s.high = 0;
r.s.low = n.s.high;
}
else
{
q.s.high = n.s.low << (n_uword_bits - sr);
r.s.high = n.s.high >> sr;
r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
}
}
}
/* Not a special case
* q and r are initialized with:
* q.all = n.all << (n_udword_bits - sr);
* r.all = n.all >> sr;
* 1 <= sr <= n_udword_bits - 1
*/
su_int carry = 0;
for (; sr > 0; --sr)
{
/* r:q = ((r:q) << 1) | carry */
r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));
r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));
q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));
q.s.low = (q.s.low << 1) | carry;
/* carry = 0;
* if (r.all >= d.all)
* {
* r.all -= d.all;
* carry = 1;
* }
*/
const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
carry = s & 1;
r.all -= d.all & s;
}
q.all = (q.all << 1) | carry;
if (rem)
*rem = r.all;
return q.all;
}
// Returns: the number of leading 0-bits
// Precondition: a != 0
COMPILER_RT_ABI si_int __clzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0
x >>= 16 - t; // x = [0 - 0xFFFF]
su_int r = t; // r = [0, 16]
// return r + clz(x)
t = ((x & 0xFF00) == 0) << 3;
x >>= 8 - t; // x = [0 - 0xFF]
r += t; // r = [0, 8, 16, 24]
// return r + clz(x)
t = ((x & 0xF0) == 0) << 2;
x >>= 4 - t; // x = [0 - 0xF]
r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28]
// return r + clz(x)
t = ((x & 0xC) == 0) << 1;
x >>= 2 - t; // x = [0 - 3]
r += t; // r = [0 - 30] and is even
// return r + clz(x)
// switch (x)
// {
// case 0:
// return r + 2;
// case 1:
// return r + 1;
// case 2:
// case 3:
// return r;
// }
return r + ((2 - x) & -((x & 2) == 0));
}
// Returns: the number of trailing 0-bits
// Precondition: a != 0
COMPILER_RT_ABI si_int __ctzsi2(si_int a) {
su_int x = (su_int)a;
si_int t = ((x & 0x0000FFFF) == 0)
<< 4; // if (x has no small bits) t = 16 else 0
x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
su_int r = t; // r = [0, 16]
// return r + ctz(x)
t = ((x & 0x00FF) == 0) << 3;
x >>= t; // x = [0 - 0xFF] + higher garbage bits
r += t; // r = [0, 8, 16, 24]
// return r + ctz(x)
t = ((x & 0x0F) == 0) << 2;
x >>= t; // x = [0 - 0xF] + higher garbage bits
r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28]
// return r + ctz(x)
t = ((x & 0x3) == 0) << 1;
x >>= t;
x &= 3; // x = [0 - 3]
r += t; // r = [0 - 30] and is even
// return r + ctz(x)
// The branch-less return statement below is equivalent
// to the following switch statement:
// switch (x)
// {
// case 0:
// return r + 2;
// case 2:
// return r + 1;
// case 1:
// case 3:
// return r;
// }
return r + ((2 - (x >> 1)) & -((x & 1) == 0));
}
typedef int si_int;
typedef long long di_int;
typedef unsigned su_int;
#define CHAR_BIT __CHAR_BIT__
si_int __ctzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.low == 0);
return __ctzsi2((x.s.high & f) | (x.s.low & ~f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}
si_int __clzdi2(di_int a) {
dwords x;
x.all = a;
const si_int f = -(x.s.high == 0);
return __clzsi2((x.s.high & ~f) | (x.s.low & f)) +
(f & ((si_int)(sizeof(si_int) * CHAR_BIT)));
}