blob: 37707b83f063ad972244222d864db9929f32d8bc [file] [log] [blame]
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdint.h>
#include <math.h>
#include "fplib.h"
#if !defined(FLT_MANT_DIG)
#define FLT_MANT_DIG 24
#endif
#define as_float(x) (*((float *)(&x)))
#define as_long(x) (*((int64_t *)(&x)))
static uint32_t clz(uint64_t value)
{
uint32_t num_zeros;
for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
{
if(0x8000000000000000 & (value << num_zeros))
break;
}
return num_zeros;
}
float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
{
switch (rnd) {
case qcomRTZ: {
int sign = 0;
if (!data)
return 0.0f;
if (data < 0){
data = - data;
sign = 1;
}
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0)
mantissa = (uint32_t)((uint64_t)data >> mantShift);
else
mantissa = (uint32_t)((uint64_t)data << -mantShift);
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
if (sign)
result |= 0x80000000;
return as_float(result);
break;
}
case qcomRTE: return (float)(data); break;
case qcomRTP: {
int sign = 0;
int inExact = 0;
uint32_t f = 0xdf000000;
if (!data)
return 0.0f;
if (data == 0x8000000000000000)
return as_float(f);
if (data < 0){
data = - data;
sign = 1;
}
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
uint64_t mask = (1 << mantShift) - 1;
if ((temp << mantShift) != data)
inExact = 1;
mantissa = (uint32_t)temp;
}
else
{
mantissa = (uint32_t)((uint64_t)data << -mantShift);
}
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
if (sign)
result |= 0x80000000;
if (sign)
return as_float(result); // for negative inputs return rtz results
else
{
if(inExact)
{ // for positive inputs return higher next fp
uint32_t high_float = 0x7f7fffff;
return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
}
else
return as_float(result);
}
}
break;
case qcomRTN: {
int sign = 0;
int inExact = 0;
uint32_t f = 0xdf000000;
if (!data)
return 0.0f;
if (data == 0x8000000000000000)
return as_float(f);
if (data < 0){
data = - data;
sign = 1;
}
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
}
else
mantissa = (uint32_t)((uint64_t)data << -mantShift);
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
if (sign)
result |= 0x80000000;
if (!sign)
return as_float(result); // for positive inputs return RTZ result
else{
if(inExact){ // for negative inputs find the lower next fp number
uint32_t low_float = 0xff7fffff;
return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
}
else
return as_float(result);
}
}
case qcomRoundingModeCount: {
break; // Avoid build error for unhandled enum value
}
}
return 0.0f;
}
float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
{
switch (rnd) {
case qcomRTZ: {
if (!data)
return 0.0f;
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0)
mantissa = (uint32_t)(data >> mantShift);
else
mantissa = (uint32_t)(data << -mantShift);
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
return as_float(result);
break;
}
case qcomRTE: return (float)(data); break;
case qcomRTP: {
int inExact = 0;
if (!data)
return 0.0f;
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = data >> mantShift;
uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
}
else
mantissa = (uint32_t)(data << -mantShift);
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
if(inExact){ // for positive inputs return higher next fp
uint32_t high_float = 0x7f7fffff;
return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
}
else
return as_float(result);
}
case qcomRTN: {
int inExact = 0;
if (!data)
return 0.0f;
uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
int mantShift = 40 - clz(data);
uint32_t mantissa;
if (mantShift >= 0){
uint64_t temp = (uint64_t)data >> mantShift;
uint64_t mask = (1 << mantShift) - 1;
if (temp << mantShift != data)
inExact = 1;
mantissa = (uint32_t)temp;
}
else
mantissa = (uint32_t)((uint64_t)data << -mantShift);
mantissa &= 0x7fffff;//mask off the leading 1
uint32_t result = exponent | mantissa;
return as_float(result); // for positive inputs return RTZ result
}
case qcomRoundingModeCount: {
break; // Avoid build error for unhandled enum value
}
}
return 0.0f;
}