Fixpoint approximation

Have you ever been in the situation where you needed to calculate e.g. some sin on a microcontroller and realized that the standard math.h libary which uses floating point is way to slow for your job.

The standard approach is to switch to a fixpoint implementation (taylor approximation, lookuptable, polynomial interpolation etc..). But the question is, where to get such a fast fixpoint implementation from.

Even if you have an implementation, you can always trade between number of lookupvalues, calculation speed, number of polynoms or stepsize between induvidual polynoms.

So the idea is to let a programm create the fast c-code implementation depending on whatever parameters you chose.

The following python code is my first “work in progress” for such an approach.  The resulting c implementation, which this code will generate is quit fast and accurate.

###################################################################################
# Generates an aproximating fixpoint C function for the input from 0 to inputMax
# It divides the input into several parts (each part must have a length of 2**x) and calculates 
# for this part the best (lowest mean square error) polynomal approximation. 
# After that it generates the C inplentation with this lookuptable. 
# Known limitations:
# * input and output and coeffietent fraction length is for now fix with 15
# * no checks for overflows
# * Stepsize wich is the size of one part of the input is only posible with=2**-x with x=1,2,3 ...
#
# Author: Norbert Feurle
# Date: 15.7.2011


from scipy import *
from pylab import *

### Konfiguration ###
aFunction=cos
inputMAX=pi/2  ## Input range must go from 0 to somewhere
Stepsize=2**-3  ## should be 2**-x with x=1,2,3 .....
InputFractionalLength=15    # not fully implemented jet
CoeffFractionalLength=15    # not fully
#OutputFractionalLength=15 # not implemented jet format is X.15
highestPolynomExponent=2    # muss >=1 sein
Outputfile="Fixpointcode.c"
#####################

ReggressionStep=Stepsize/64.0

Xrange=arange(0,Stepsize,ReggressionStep)

AA=matrix(ones(len(Xrange))).H
for i in range(highestPolynomExponent):
    AA=c_[matrix(Xrange**(i+1)).H,AA]

CoeffizentList=matrix([[]])
CoeffizentList=CoeffizentList.reshape((0,highestPolynomExponent+1))
for x_position in arange(0,inputMAX,Stepsize):
	#print x_position
	dataXrange=arange(x_position,x_position+Stepsize,ReggressionStep)
	#print len(dataXrange),len(AA)
	Ys=matrix(aFunction(dataXrange)).T
	#print len(Ys)
	## calculate minimal square error result
	AB_standard=inv(AA.T*AA)*AA.T*Ys
	#CoeffizentList.append(AB_standard.T.A[0])
	CoeffizentList=r_[CoeffizentList,AB_standard.T]

#print CoeffizentList

## Length of CoeffizentList
NrEntries=len(CoeffizentList)

### Quantisize CoeffizentList
CoeffizentList=CoeffizentList*2**CoeffFractionalLength

NrPolys=CoeffizentList.shape[1]

### Calculate and plot Residuals
Residals=[]
InputRange=arange(0,inputMAX,Stepsize*1.0/100)
for input_val in InputRange:
	Exaktvalue=aFunction(input_val)
	lookuppos=int(input_val*1.0/Stepsize)

	cur_xval=(input_val-lookuppos*Stepsize)#*2**InputFractionalLength)
	#print cur_xval
	## calculate using horner schema/method 
	#for data in
	data=int(CoeffizentList[lookuppos,0]+0.5)  ## highest polynom coeffizient
	for cur_poly in range(1,NrPolys):
		data=data*cur_xval
		#data=data>>15
		data=data+int(CoeffizentList[lookuppos,cur_poly]+0.5)

	Fixpointvalue=data
	#for cur_poly in range(NrPolys-1):
	# data=int(CoeffizentList[lookuppos,cur_poly+1])+data
	#Fixpointvalue=int(CoeffizentList[lookuppos,2])+cur_xval*(int(CoeffizentList[lookuppos,1])+cur_xval*int(CoeffizentList[lookuppos,0]))

	Error=Exaktvalue-(Fixpointvalue*2**-CoeffFractionalLength)
	Residals.append(Error)

import sys
sys.stdout = open(Outputfile, "a")
for cur_poly in range(NrPolys):
	print "int CoeffsPoly"+str(NrPolys-1-cur_poly)+aFunction.__name__+"["+str(NrEntries)+"]={"
	for coeff_data in CoeffizentList[:-1]:
		#print coeff_data
		print str(int(coeff_data[0,cur_poly]+0.5))+","
	print str(int(CoeffizentList[-1][0,cur_poly]+0.5))
	print "};"		

print "int my"+aFunction.__name__+"(int phi){"
print "\tregister int phiXt=phi; // input is in X.15 format"
print "\tphiXt=(phiXt&"+hex(int((2**InputFractionalLength-1)/(1.0/Stepsize)))+");"
print
print "\t// Get the right Coeffizients"
print "\tregister int index=phi>>"+str(int(InputFractionalLength+log2(Stepsize)))+";"
for cur_poly in range(NrPolys):
	print "\tregister int xup"+str(cur_poly)+"=CoeffsPoly"+str(cur_poly)+aFunction.__name__+"[index];"

	#print data=int(CoeffizentList[lookuppos,0]) ## highest polynom coeffizient
highestPolyCoeffString="xup"+str(NrPolys-1)
Polyrange=range(0,NrPolys-1)
Polyrange.reverse()
print
print "\t// calculate the result using horner schema"
for cur_poly in Polyrange:
	CurtentPolyCoeffString="xup"+str(cur_poly)
	print "\t"+highestPolyCoeffString+"=(phiXt*"+highestPolyCoeffString+")>>"+str(int(InputFractionalLength))+";"  ##=data*cur_xval
	print "\t"+highestPolyCoeffString+"="+CurtentPolyCoeffString+"+"+highestPolyCoeffString+";"
	#data=data+int(CoeffizentList[lookuppos,cur_poly])

print
print "\t// Return the result"
print "\treturn "+highestPolyCoeffString+";"
print "}"

fig = figure(1)
ax = fig.add_subplot(111)
ax.grid(True)
ax.set_title("Residuals of approximated Fixpoint (Input not quantisized)"+str(aFunction.__name__))
ax.set_xlabel("Input")
ax.set_ylabel("Abweichung vom Orginal")
line1=ax.plot(InputRange,Residals)
show()

And here the generated c-Code. Really great 😉
File: Fixpointcode.c

int CoeffsPoly2cos[13]={
-16347,
-16094,
-15590,
-14842,
-13863,
-12668,
-11275,
-9705,
-7985,
-6139,
-4198,
-2192,
-151
};
int CoeffsPoly1cos[13]={
-1,
-4093,
-8120,
-12021,
-15734,
-19202,
-22370,
-25189,
-27615,
-29610,
-31143,
-32190,
-32734
};
int CoeffsPoly0cos[13]={
32768,
32512,
31749,
30491,
28757,
26574,
23976,
21005,
17705,
14129,
10333,
6375,
2318
};
int mycos(int phi){
	register int phiXt=phi; // input is in X.15 format
	phiXt=(phiXt&0xfff);

	// Get the right Coeffizients
	register int index=phi>>12;
	register int xup0=CoeffsPoly0cos[index];
	register int xup1=CoeffsPoly1cos[index];
	register int xup2=CoeffsPoly2cos[index];

	// calculate the result using horner schema
	xup2=(phiXt*xup2)>>15;
	xup2=xup1+xup2;
	xup2=(phiXt*xup2)>>15;
	xup2=xup0+xup2;

	// Return the result
	return xup2;
}

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: