OpenASIP  2.0
HalfFloatWord.cc
Go to the documentation of this file.
1 
2 /*
3  Copyright (c) 2002-2012 Tampere University.
4 
5  This file is part of TTA-Based Codesign Environment (TCE).
6 
7  Permission is hereby granted, free of charge, to any person obtaining a
8  copy of this software and associated documentation files (the "Software"),
9  to deal in the Software without restriction, including without limitation
10  the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  and/or sell copies of the Software, and to permit persons to whom the
12  Software is furnished to do so, subject to the following conditions:
13 
14  The above copyright notice and this permission notice shall be included in
15  all copies or substantial portions of the Software.
16 
17  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  DEALINGS IN THE SOFTWARE.
24  */
25 /**
26  * @file HalfFloatWord.cc
27  * @author Heikki Kultala (pjaaskel-no.spam-cs.tut.fi) 2012
28  *
29  * This file contains implementation of float16 data type used by simulator.
30  */
31 
32 #include "HalfFloatWord.hh"
33 #include <cmath>
34 
35 #ifndef INFINITY
36 #define INFINITY 1.0/0.0
37 #endif
38 
39 #ifndef NAN
40 #define NAN 0.0/0.0
41 #endif
42 
44  int i;
45  unsigned int u;
46  float f;
47 };
48 
49 // constructors
50 HalfFloatWord::HalfFloatWord(uint16_t binaryRep) : binaryRep_(binaryRep) {}
51 
52 // constructors
53 HalfFloatWord::HalfFloatWord() : binaryRep_(0) {}
54 
55 uint16_t
58  u.f = value;
59  int binary16 = (u.i & 0x80000000) >> 16;
60 
61  int expon = (u.i & 0x7f800000) >> 23;
62  if(expon == 255) {
63  // Handle NAN
64  return binary16 | 0x7e00;
65  }
66  expon += 15-127;
67  if(expon <= 0) {
68  // Underflow, return zero with correct sign
69  // TODO: support denormals?
70  return binary16;
71  }
72  if(expon >= 31) {
73  // Overflow, return inf with correct sign
74  binary16 |= 0x7c00;
75  return binary16;
76  }
77  binary16 |= expon << 10;
78  binary16 |= (u.i & 0x007FFFFF) >> 13;
79 
80  //Round to nearest even. Comment following code out
81  //for Round to Zero behavior.
82  int l, g, r, s;
83  l = (u.i >> 13) & 1;
84  g = (u.i >> 12) & 1;
85  r = (u.i >> 11) & 1;
86  s = (u.i & ((1<<11)-1)) ? 1 : 0;
87  if(g && (l || (r||s)))
88  binary16++;
89 
90  return binary16;
91 }
92 
93 /**
94  * Exact and slower version of operator float()-function.
95  *
96  * Attempts to retain original presentation of the source half float.
97  */
98 float
100  FloatConvUnion u;
101 
102  static const uint32_t half_inf_mask = 0x7C00;
103  static const uint32_t half_mant_mask = 0x03FF;
104  static const uint32_t float_inf_mask = 0x7F800000;
105 
106  uint32_t sign = (value.getBinaryRep() & 0x8000) << 16;
107  uint32_t exp = (value.getBinaryRep() & half_inf_mask) >> 10; // Biased form
108  uint32_t mant = (value.getBinaryRep() & half_mant_mask);
109 
110  if (exp == 0x1F) { // is +-inf or NaN?
111  u.u = sign | float_inf_mask | mant << 13;
112  return u.f;
113  }
114 
115  if ((exp == 0 && mant != 0)) { // is denormal?
116  // normalize
117  exp = 127;
118  while (!(mant & 0x400)) {
119  exp--;
120  mant <<= 1;
121  }
122  mant &= 0x400;
123  u.u = sign | exp << 23 | mant << 13;
124  return u.f;
125  } else {
126  u.u = sign | (exp-15+127) << 23 | mant << 13;
127  return u.f;
128  }
129 }
130 
131 #pragma GCC diagnostic warning "-Wstrict-aliasing"
132 
135  return *this;
136 }
137 
139  binaryRep_ = hf.binaryRep_;
140  return *this;
141 }
142 
144  binaryRep_(convertFloatToHalfWordRep(value)) {}
145 
146 
148  binaryRep_(hw.binaryRep_) {}
149 
150 #pragma GCC diagnostic ignored "-Wdiv-by-zero"
151 
152 HalfFloatWord::operator float() const {
153  if (binaryRep_ == 0xFC00) {
154  return -INFINITY;
155  }
156  if (binaryRep_ == 0x7C00) {
157  return INFINITY;
158  }
159 
160  bool sgn = ((binaryRep_ & 0x8000) >> 15);
161  int exp = (binaryRep_ & 0x7C00) >> 10;
162  int mant = binaryRep_ & 0x03FF;
163 
164  if (exp == 0x1F && mant != 0) {
165  return NAN;
166  }
167 
168  float value = (exp == 0) ? mant : mant | 0x0400; // 1.x if not denormal
169  value /= 0x400;
170  float mul = exp2(exp - 15);
171  value *= mul;
172  if (sgn) {
173  value *= -1;
174  }
175  return value;
176 }
177 
178 
179 #pragma GCC diagnostic warning "-Wdiv-by-zero"
180 
181  // calculations
183  return HalfFloatWord(float(*this) + float(right));
184 }
185 
187  return HalfFloatWord(float(*this) - float(right));
188 }
189 
191  return HalfFloatWord(float(*this) * float(right));
192 }
193 
195  return HalfFloatWord(float(*this) / float(right));
196 }
INFINITY
#define INFINITY
Definition: HalfFloatWord.cc:36
HalfFloatWord::convertToFloat
static float convertToFloat(HalfFloatWord value)
Definition: HalfFloatWord.cc:99
HalfFloatWord
Definition: HalfFloatWord.hh:41
HalfFloatWord::operator=
const HalfFloatWord & operator=(float value)
Definition: HalfFloatWord.cc:133
HalfFloatWord::operator*
HalfFloatWord operator*(const HalfFloatWord &right) const
Definition: HalfFloatWord.cc:190
HalfFloatWord::getBinaryRep
uint16_t getBinaryRep() const
Definition: HalfFloatWord.hh:60
HalfFloatWord::operator/
HalfFloatWord operator/(const HalfFloatWord &right) const
Definition: HalfFloatWord.cc:194
NAN
#define NAN
Definition: HalfFloatWord.cc:40
FloatConvUnion::u
unsigned int u
Definition: HalfFloatWord.cc:45
HalfFloatWord::convertFloatToHalfWordRep
static uint16_t convertFloatToHalfWordRep(float value)
Definition: HalfFloatWord.cc:56
HalfFloatWord::operator+
HalfFloatWord operator+(const HalfFloatWord &right) const
Definition: HalfFloatWord.cc:182
FloatConvUnion::f
float f
Definition: HalfFloatWord.cc:46
HalfFloatWord::binaryRep_
uint16_t binaryRep_
Definition: HalfFloatWord.hh:66
FloatConvUnion
Definition: HalfFloatWord.cc:43
FloatConvUnion::i
int i
Definition: HalfFloatWord.cc:44
HalfFloatWord::operator-
HalfFloatWord operator-(const HalfFloatWord &right) const
Definition: HalfFloatWord.cc:186
HalfFloatWord::HalfFloatWord
HalfFloatWord()
Definition: HalfFloatWord.cc:53
HalfFloatWord.hh