Skip to content

Commit 1c84fa8

Browse files
committed
Port the Float16 runtime functions to C.
The Julia runtime wasn't safe wrt. GC operations. I left the commit in for archival purposes, in case we want to revisit this again later.
1 parent 4478841 commit 1c84fa8

File tree

6 files changed

+204
-309
lines changed

6 files changed

+204
-309
lines changed

base/Base.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,6 @@ end
129129
include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl")) # include($BUILDROOT/base/build_h.jl)
130130
include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
131131

132-
# run-time library
133-
include("runtime/runtime.jl")
134-
using .Runtime
135-
136132
# numeric operations
137133
include("hashing.jl")
138134
include("rounding.jl")

base/runtime/runtime.jl

Lines changed: 0 additions & 161 deletions
This file was deleted.

src/intrinsics.cpp

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,3 +1299,200 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
12991299
}
13001300
assert(0 && "unreachable");
13011301
}
1302+
1303+
1304+
// float16 intrinsics
1305+
// TODO: use LLVM's compiler-rt
1306+
1307+
static inline float half_to_float(uint16_t ival)
1308+
{
1309+
uint32_t sign = (ival & 0x8000) >> 15;
1310+
uint32_t exp = (ival & 0x7c00) >> 10;
1311+
uint32_t sig = (ival & 0x3ff) >> 0;
1312+
uint32_t ret;
1313+
1314+
if (exp == 0) {
1315+
if (sig == 0) {
1316+
sign = sign << 31;
1317+
ret = sign | exp | sig;
1318+
}
1319+
else {
1320+
int n_bit = 1;
1321+
uint16_t bit = 0x0200;
1322+
while ((bit & sig) == 0) {
1323+
n_bit = n_bit + 1;
1324+
bit = bit >> 1;
1325+
}
1326+
sign = sign << 31;
1327+
exp = ((-14 - n_bit + 127) << 23);
1328+
sig = ((sig & (~bit)) << n_bit) << (23 - 10);
1329+
ret = sign | exp | sig;
1330+
}
1331+
}
1332+
else if (exp == 0x1f) {
1333+
if (sig == 0) { // Inf
1334+
if (sign == 0)
1335+
ret = 0x7f800000;
1336+
else
1337+
ret = 0xff800000;
1338+
}
1339+
else // NaN
1340+
ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10));
1341+
}
1342+
else {
1343+
sign = sign << 31;
1344+
exp = ((exp - 15 + 127) << 23);
1345+
sig = sig << (23 - 10);
1346+
ret = sign | exp | sig;
1347+
}
1348+
1349+
float fret;
1350+
memcpy(&fret, &ret, sizeof(float));
1351+
return fret;
1352+
}
1353+
1354+
// float to half algorithm from:
1355+
// "Fast Half Float Conversion" by Jeroen van der Zijp
1356+
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
1357+
//
1358+
// With adjustments for round-to-nearest, ties to even.
1359+
1360+
static uint16_t basetable[512] = {
1361+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1362+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1363+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1364+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1365+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1366+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1367+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1368+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1369+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1370+
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
1371+
0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
1372+
0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
1373+
0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
1374+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1375+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1376+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1377+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1378+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1379+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1380+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1381+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1382+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1383+
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
1384+
0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1385+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1386+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1387+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1388+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1389+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1390+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1391+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1392+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1393+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
1394+
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
1395+
0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
1396+
0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
1397+
0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1398+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1399+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1400+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1401+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1402+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1403+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1404+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1405+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1406+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
1407+
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
1408+
1409+
static uint8_t shifttable[512] = {
1410+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1411+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1412+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1413+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1414+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1415+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1416+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1417+
0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
1418+
0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
1419+
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
1420+
0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1421+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1422+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1423+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1424+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1425+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1426+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1427+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1428+
0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1429+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1430+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1431+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1432+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1433+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1434+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
1435+
0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
1436+
0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
1437+
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
1438+
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1439+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1440+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1441+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1442+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1443+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1444+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1445+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
1446+
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
1447+
1448+
static inline uint16_t float_to_half(float param)
1449+
{
1450+
uint32_t f;
1451+
memcpy(&f, &param, sizeof(float));
1452+
if (isnan(param)) {
1453+
uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10)));
1454+
return t ^ ((uint16_t)(f >> 0xd));
1455+
}
1456+
int i = ((f & ~0x007fffff) >> 23);
1457+
uint8_t sh = shifttable[i];
1458+
f &= 0x007fffff;
1459+
// If `val` is subnormal, the tables are set up to force the
1460+
// result to 0, so the significand has an implicit `1` in the
1461+
// cases we care about.
1462+
f |= 0x007fffff + 0x1;
1463+
uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff));
1464+
// round
1465+
// NOTE: we maybe should ignore NaNs here, but the payload is
1466+
// getting truncated anyway so "rounding" it might not matter
1467+
int nextbit = (f >> (sh - 1)) & 1;
1468+
if (nextbit != 0 && (h & 0x7C00) != 0x7C00) {
1469+
// Round halfway to even or check lower bits
1470+
if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0)
1471+
h += UINT16_C(1);
1472+
}
1473+
return h;
1474+
}
1475+
1476+
#if !defined(_OS_DARWIN_) // xcode already links compiler-rt
1477+
1478+
extern "C" JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
1479+
{
1480+
return half_to_float(param);
1481+
}
1482+
1483+
extern "C" JL_DLLEXPORT float __extendhfsf2(uint16_t param)
1484+
{
1485+
return half_to_float(param);
1486+
}
1487+
1488+
extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
1489+
{
1490+
return float_to_half(param);
1491+
}
1492+
1493+
extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param)
1494+
{
1495+
return float_to_half((float)param);
1496+
}
1497+
1498+
#endif

src/julia.expmap

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@
4242
environ;
4343
__progname;
4444

45+
/* compiler run-time intrinsics */
46+
__gnu_h2f_ieee;
47+
__extendhfsf2;
48+
__gnu_f2h_ieee;
49+
__truncdfhf2;
50+
4551
local:
4652
*;
4753
};

test/choosetests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ in the `choices` argument:
3333
"""
3434
function choosetests(choices = [])
3535
testnames = [
36-
"subarray", "core", "compiler", "worlds", "runtime",
36+
"subarray", "core", "compiler", "worlds",
3737
"keywordargs", "numbers", "subtype",
3838
"char", "strings", "triplequote", "unicode", "intrinsics",
3939
"dict", "hashing", "iobuffer", "staged", "offsetarray",

0 commit comments

Comments
 (0)