From 848b55abc39a30c691ca8ecc671f724f6cd41c96 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Sat, 16 Mar 2019 10:16:21 +0800 Subject: [PATCH 1/2] Use fabsf instead of fabs --- Silverware/src/pid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Silverware/src/pid.c b/Silverware/src/pid.c index 55c751b..5098f9d 100644 --- a/Silverware/src/pid.c +++ b/Silverware/src/pid.c @@ -402,9 +402,9 @@ float pid(int x ) stickTransition[x] = stickTransitionProfileA[x]; } if (stickAccelerator[x] < 1){ - transitionSetpointWeight[x] = (fabs(rxcopy[x]) * stickTransition[x]) + (1- stickTransition[x]); + transitionSetpointWeight[x] = (fabsf(rxcopy[x]) * stickTransition[x]) + (1- stickTransition[x]); }else{ - transitionSetpointWeight[x] = (fabs(rxcopy[x]) * (stickTransition[x] / stickAccelerator[x])) + (1- stickTransition[x]); + transitionSetpointWeight[x] = (fabsf(rxcopy[x]) * (stickTransition[x] / stickAccelerator[x])) + (1- stickTransition[x]); } static float lastrate[3]; static float lastsetpoint[3]; @@ -442,9 +442,9 @@ float pid(int x ) stickTransition[x] = stickTransitionProfileA[x]; } if (stickAccelerator[x] < 1){ - transitionSetpointWeight[x] = (fabs(rxcopy[x]) * stickTransition[x]) + (1- stickTransition[x]); + transitionSetpointWeight[x] = (fabsf(rxcopy[x]) * stickTransition[x]) + (1- stickTransition[x]); }else{ - transitionSetpointWeight[x] = (fabs(rxcopy[x]) * (stickTransition[x] / stickAccelerator[x])) + (1- stickTransition[x]); + transitionSetpointWeight[x] = (fabsf(rxcopy[x]) * (stickTransition[x] / stickAccelerator[x])) + (1- stickTransition[x]); } static float lastrate[3]; static float lastsetpoint[3]; From a9e2d4e1620c0ddef84ab4f309b4b361eca9c164 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Sat, 16 Mar 2019 10:16:48 +0800 Subject: [PATCH 2/2] Add qfp lib to enable GCC-8 build --- Makefile | 91 +++- Silverware/src/drv_softserial.h | 1 + gcc/Makefile | 59 --- gcc/methods.c | 180 ++++++- gcc/qfplib/LICENCE | 339 +++++++++++++ gcc/qfplib/README | 13 + gcc/qfplib/qfpio.h | 32 ++ gcc/qfplib/qfpio.s | 868 ++++++++++++++++++++++++++++++++ gcc/qfplib/qfplib.h | 54 ++ gcc/qfplib/qfplib.s | 858 +++++++++++++++++++++++++++++++ 10 files changed, 2418 insertions(+), 77 deletions(-) delete mode 100644 gcc/Makefile create mode 100644 gcc/qfplib/LICENCE create mode 100644 gcc/qfplib/README create mode 100644 gcc/qfplib/qfpio.h create mode 100644 gcc/qfplib/qfpio.s create mode 100644 gcc/qfplib/qfplib.h create mode 100644 gcc/qfplib/qfplib.s diff --git a/Makefile b/Makefile index ada44ef..1bc05fe 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,58 @@ +ifdef USE_GCC + CC = arm-none-eabi-gcc + CXX = arm-none-eabi-g++ + ASM = arm-none-eabi-as + LD = arm-none-eabi-gcc + OBJCOPY = arm-none-eabi-objcopy + SIZE = arm-none-eabi-size +else + CC = armcc --c99 + CXX = armcc --cpp + ASM = armasm + LD = armlink + OBJCOPY = fromelf +endif SDIR = . -VPATH = $(SDIR)/Silverware/src:$(SDIR)/Utilities:$(SDIR)/Libraries/STM32F0xx_StdPeriph_Driver/src:$(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Source/Templates/arm SRC_C = $(wildcard $(SDIR)/Silverware/src/*.c) \ - $(wildcard $(SDIR)/Utilities/*.c) \ - $(wildcard $(SDIR)/Libraries/STM32F0xx_StdPeriph_Driver/src/*.c) + $(wildcard $(SDIR)/Utilities/*.c) \ + $(wildcard $(SDIR)/Libraries/STM32F0xx_StdPeriph_Driver/src/*.c) SRC_CXX = $(wildcard $(SDIR)/Silverware/src/*.cpp) -SRC_S = $(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Source/Templates/arm/startup_stm32f031.s -CFLAGS := -I$(DIR)/Silverware/src -I$(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Include -I $(SDIR)/Libraries/CMSIS/Include -I $(SDIR)/Utilities -I $(SDIR)/Libraries/STM32F0xx_StdPeriph_Driver/inc - -CPU = --cpu Cortex-M0 - -CFLAGS += $(CPU) -D__EVAL -D__MICROLIB -g -O2 --apcs=interwork --split_sections -D__UVISION_VERSION="524" -DUSE_STDPERIPH_DRIVER -DSTM32F031 --fpmode=fast -ASMFLAGS := $(CPU) --pd "__EVAL SETA 1" -g --apcs=interwork --pd "__MICROLIB SETA 1" --pd "__UVISION_VERSION SETA 524" --xref -LDFLAGS := $(CPU) --library_type=microlib --ro-base 0x08000000 --entry 0x08000000 --rw-base 0x20000000 --entry Reset_Handler --first __Vectors --strict --info summarysizes - -SRCS := $(SRC_C) $(SRC_CXX) $(SRC_S) +ifdef USE_GCC +SRC_C += $(wildcard $(SDIR)/gcc/*.c) +SRC_S += $(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Source/Templates/gcc_ride7/startup_stm32f030.s +SRC_S += $(SDIR)/gcc/qfplib/qfplib.s +LDFLAGS := -Wl,-wrap,__aeabi_dmul \ + -Wl,-wrap,__aeabi_fadd \ + -Wl,-wrap,__aeabi_fdiv \ + -Wl,-wrap,__aeabi_fmul \ + -Wl,-wrap,__aeabi_fsub \ + -Wl,-wrap,__aeabi_i2f \ + -Wl,-wrap,__aeabi_ui2f \ + -Wl,-wrap,__aeabi_f2iz \ + -Wl,-wrap,__aeabi_f2uiz \ + -Wl,-wrap,__aeabi_fcmpeq \ + -Wl,-wrap,__aeabi_fcmplt \ + -Wl,-wrap,__aeabi_fcmple \ + -Wl,-wrap,__aeabi_fcmpge \ + -Wl,-wrap,__aeabi_fcmpgt \ + -Wl,-wrap,__aeabi_fcmpun +else +SRC_S += $(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Source/Templates/arm/startup_stm32f031.s +endif + +INCLUDES = -I$(SDIR)/Silverware/src -I$(SDIR)/Libraries/CMSIS/Device/ST/STM32F0xx/Include -I $(SDIR)/Libraries/CMSIS/Include -I $(SDIR)/Utilities -I $(SDIR)/Libraries/STM32F0xx_StdPeriph_Driver/inc + +CPU = cortex-m0 + +SRCS = $(SRC_C) $(SRC_CXX) $(SRC_S) ODIR = $(SDIR)/obj OBJS = $(addprefix $(ODIR)/, $(notdir $(SRC_C:.c=.o) $(SRC_S:.s=.o) $(SRC_CXX:.cpp=.o))) +VPATH = $(dir $(SRCS)) +ifndef USE_GCC export ARM_TOOL_VARIANT = mdk_lite export ARMCC5_ASMOPT = --diag_suppress=9931 export ARMCC5_CCOPT = --diag_suppress=9931 @@ -28,6 +61,23 @@ export CPU_TYPE = STM32F030F4 export CPU_VENDOR = STMicroelectronics export CPU_CLOCK = 0x00B71B00 export UV2_TARGET = BWhoop +endif + + +ifndef USE_GCC +CFLAGS := --cpu $(CPU) $(INCLUDES) -D__EVAL -D__MICROLIB -g -O2 --apcs=interwork --split_sections -D__UVISION_VERSION="524" -DUSE_STDPERIPH_DRIVER -DSTM32F031 --fpmode=fast +ASMFLAGS := --cpu $(CPU) --pd "__EVAL SETA 1" -g --apcs=interwork --pd "__MICROLIB SETA 1" --pd "__UVISION_VERSION SETA 524" --xref +LDFLAGS := --cpu $(CPU) --library_type=microlib --ro-base 0x08000000 --entry 0x08000000 --rw-base 0x20000000 --entry Reset_Handler --first __Vectors --strict --info summarysizes +DEPENDS := --depend= +else +CFLAGS := -mcpu=$(CPU) $(INCLUDES) -DDISABLE_GESTURES2 -Os -g -mthumb -fdata-sections -ffunction-sections \ + -fsingle-precision-constant -ffast-math \ + -nostartfiles --specs=nano.specs --specs=nosys.specs +CFLAGS += -DUSE_STDPERIPH_DRIVER -DSTM32F031 +LDFLAGS += $(CFLAGS) -Wl,-T,flash.ld,-Map,output.map,--gc-sections -L$(SDIR)/gcc +ASMFLAGS := -mcpu=$(CPU) +DEPENDS := -MD -MP -MF +endif .PHONY: default all default: silverware.hex @@ -41,21 +91,28 @@ $(ODIR): $(ODIR)/%.o: %.cpp @echo " + Compiling '$(notdir $<)'" - armcc --cpp $(CFLAGS) --depend=$(@:.o=.dep) -c -o $@ $< + $(CXX) $(CFLAGS) $(DEPENDS)$(@:.o=.dep) -c -o $@ $< $(ODIR)/%.o: %.c @echo " + Compiling '$(notdir $<)'" - armcc --c99 $(CFLAGS) --depend=$(@:.o=.dep) -c -o $@ $< + $(CC) $(CFLAGS) $(DEPENDS)$(@:.o=.dep) -c -o $@ $< $(ODIR)/%.o: %.s @echo " + Compiling '$(notdir $<')" - armasm $(ASMFLAGS) -o $@ $< + $(ASM) $(ASMFLAGS) -o $@ $< silverware.hex: silverware.axf +ifdef USE_GCC + $(OBJCOPY) -O ihex $< $@ +else fromelf $< --i32combined --output $@ +endif silverware.axf: $(OBJS) - armlink $(LDFLAGS) $(OBJS) -o $@ + $(LD) $(LDFLAGS) $(OBJS) -o $@ +ifdef USE_GCC + $(SIZE) $@ +endif clean: rm -Rf $(ODIR) silverware.axf silverware.hex diff --git a/Silverware/src/drv_softserial.h b/Silverware/src/drv_softserial.h index 03517e5..da14ce6 100644 --- a/Silverware/src/drv_softserial.h +++ b/Silverware/src/drv_softserial.h @@ -48,6 +48,7 @@ void softserial_write_byte_ex(const SoftSerialData_t* data, uint8_t byte); void softserial_set_input(const SoftSerialData_t* data); void softserial_set_output(const SoftSerialData_t* data); +__attribute__((always_inline)) inline void delay_until(uint32_t uS ) { while (gettime() < uS) ; diff --git a/gcc/Makefile b/gcc/Makefile deleted file mode 100644 index 8f949cb..0000000 --- a/gcc/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -TARGET=bwhoop -EXECUTABLE=bwhoop.elf - -CC=arm-none-eabi-gcc -LD=arm-none-eabi-gcc -AR=arm-none-eabi-ar -AS=arm-none-eabi-as -CP=arm-none-eabi-objcopy -OD=arm-none-eabi-objdump - -topdir = .. - -DEFS = -DUSE_STDPERIPH_DRIVER -DSTM32F031 -STARTUP = $(topdir)/Libraries/CMSIS/Device/ST/STM32F0xx/Source/Templates/gcc_ride7/startup_stm32f030.s - -MCU = cortex-m0 -MCFLAGS = -mcpu=$(MCU) -g -ggdb -mthumb -fdata-sections -ffunction-sections -fsingle-precision-constant -ffast-math -nostartfiles --specs=nano.specs --specs=nosys.specs -flto - -INCLUDES = -I$(topdir)/Silverware/src/ \ - -I$(topdir)/Libraries/CMSIS/Device/ST/STM32F0xx/Include/ \ - -I$(topdir)/Libraries/CMSIS/Include/ \ - -I$(topdir)/Utilities/ \ - -I$(topdir)/Libraries/STM32F0xx_StdPeriph_Driver/inc/ - -OPTIMIZE = -Os - -CFLAGS = $(MCFLAGS) $(OPTIMIZE) $(DEFS) -I. -I./ $(INCLUDES) -Wl,-T,flash.ld,-Map,output.map,--gc-sections -std=gnu99 - -AFLAGS = $(MCFLAGS) - -SRC = $(wildcard $(topdir)/Silverware/src/*.c) \ - $(wildcard $(topdir)/Silverware/src/*.h) \ - $(wildcard $(topdir)/Silverware/src/*.cpp) \ - $(topdir)/Libraries/STM32F0xx_StdPeriph_Driver/src/*.c \ - $(topdir)/Utilities/system_stm32f0xx.c \ - methods.c - -OBJDIR = . -OBJ = $(patsubst %.c,$(OBJDIR)/%.o,$(filter %.c,$(SRC))) -OBJ += $(patsubst %.cpp,$(OBJDIR)/%.o,$(filter %.cpp,$(SRC))) -OBJ += Startup.o - - -all: $(TARGET) - -$(TARGET).hex: $(EXECUTABLE) - $(CP) -O ihex $^ $@ -$(TARGET): $(EXECUTABLE) - $(CP) -O binary $^ $@ - -$(EXECUTABLE): $(STARTUP) $(SRC) - $(CC) $(CFLAGS) $^ -lm -o $@ - arm-none-eabi-size $(EXECUTABLE) - - -clean: - rm -f Startup.lst $(TARGET) $(TARGET).lst $(OBJ) $(AUTOGEN) \ - $(TARGET).out $(TARGET).hex $(TARGET).map \ - $(TARGET).dmp $(EXECUTABLE) diff --git a/gcc/methods.c b/gcc/methods.c index 0ea2e8b..2865a93 100644 --- a/gcc/methods.c +++ b/gcc/methods.c @@ -1 +1,179 @@ -void _sbrk() {} \ No newline at end of file +#include "qfplib/qfplib.h" + +/////////////////////////////////////////////////////////////////////////////// +// Table 3, single precision floating-point comparison helper functions + +// qfp_fcmp(r0, r1): +// equal? return 0 +// r0 > r1? return +1 +// r0 < r1: return -1 + +// result (1, 0) denotes (=, ?<>) [2], use for C == and != +int __wrap___aeabi_fcmpeq(float x, float y) { + return (qfp_fcmp(x, y) == 0) // x == y + ? 1 : 0; +} +// Unit Tests: +// aeabi_fcmpeq(2205.196, 2205.196) = 1 +// aeabi_fcmpeq(2205.196, 2205.195) = 0 +// aeabi_fcmpeq(2205.196, 2205.197) = 0 +// aeabi_fcmpeq(2205.196, 0) = 0 +// aeabi_fcmpeq(-2205.196, -2205.196) = 1 +// aeabi_fcmpeq(-2205.196, -2205.195) = 0 +// aeabi_fcmpeq(-2205.196, -2205.197) = 0 +// aeabi_fcmpeq(-2205.196, 0) = 0 + +// result (1, 0) denotes (<, ?>=) [2], use for C < +int __wrap___aeabi_fcmplt(float x, float y) { + return (qfp_fcmp(x, y) < 0) // x < y + ? 1 : 0; +} +// Unit Tests: +// aeabi_fcmplt(2205.196, 2205.196) = 0 +// aeabi_fcmplt(2205.196, 2205.195) = 0 +// aeabi_fcmplt(2205.196, 2205.197) = 1 +// aeabi_fcmplt(2205.196, 0) = 0 +// aeabi_fcmplt(-2205.196, -2205.196) = 0 +// aeabi_fcmplt(-2205.196, -2205.195) = 1 +// aeabi_fcmplt(-2205.196, -2205.197) = 0 +// aeabi_fcmplt(-2205.196, 0) = 1 + +// result (1, 0) denotes (<=, ?>) [2], use for C <= +int __wrap___aeabi_fcmple(float x, float y) { + return (qfp_fcmp(x, y) > 0) // x > y + ? 0 : 1; +} +// Unit Tests: +// aeabi_fcmple(2205.196, 2205.196) = 1 +// aeabi_fcmple(2205.196, 2205.195) = 0 +// aeabi_fcmple(2205.196, 2205.197) = 1 +// aeabi_fcmple(2205.196, 0) = 0 +// aeabi_fcmple(-2205.196, -2205.196) = 1 +// aeabi_fcmple(-2205.196, -2205.195) = 1 +// aeabi_fcmple(-2205.196, -2205.197) = 0 +// aeabi_fcmple(-2205.196, 0) = 1 + +// result (1, 0) denotes (>=, ?<) [2], use for C >= +int __wrap___aeabi_fcmpge(float x, float y) { + return (qfp_fcmp(x, y) < 0) // x < y + ? 0 : 1; +} +// Unit Tests: +// aeabi_fcmpge(2205.196, 2205.196) = 1 +// aeabi_fcmpge(2205.196, 2205.195) = 1 +// aeabi_fcmpge(2205.196, 2205.197) = 0 +// aeabi_fcmpge(2205.196, 0) = 1 +// aeabi_fcmpge(-2205.196, -2205.196) = 1 +// aeabi_fcmpge(-2205.196, -2205.195) = 0 +// aeabi_fcmpge(-2205.196, -2205.197) = 1 +// aeabi_fcmpge(-2205.196, 0) = 0 + +// result (1, 0) denotes (>, ?<=) [2], use for C > +int __wrap___aeabi_fcmpgt(float x, float y) { + return (qfp_fcmp(x, y) > 0) // x > y + ? 1 : 0; +} +// Unit Tests: +// aeabi_fcmpgt(2205.196, 2205.196) = 0 +// aeabi_fcmpgt(2205.196, 2205.195) = 1 +// aeabi_fcmpgt(2205.196, 2205.197) = 0 +// aeabi_fcmpgt(2205.196, 0) = 1 +// aeabi_fcmpgt(-2205.196, -2205.196) = 0 +// aeabi_fcmpgt(-2205.196, -2205.195) = 0 +// aeabi_fcmpgt(-2205.196, -2205.197) = 1 +// aeabi_fcmpgt(-2205.196, 0) = 0 + +// result (1, 0) denotes (?, <=>) [2], use for C99 isunordered() +int __wrap___aeabi_fcmpun(float x, float y) { + return (qfp_fcmp(x, y) == 0) // x == y + ? 0 : 1; +} +// Unit Tests: +// aeabi_fcmpun(2205.196, 2205.196) = 0 +// aeabi_fcmpun(2205.196, 2205.195) = 1 +// aeabi_fcmpun(2205.196, 2205.197) = 1 +// aeabi_fcmpun(2205.196, 0) = 1 +// aeabi_fcmpun(-2205.196, -2205.196) = 0 +// aeabi_fcmpun(-2205.196, -2205.195) = 1 +// aeabi_fcmpun(-2205.196, -2205.197) = 1 +// aeabi_fcmpun(-2205.196, 0) = 1 + +/////////////////////////////////////////////////////////////////////////////// +// Table 4, Standard single precision floating-point arithmetic helper functions + +// single-precision division, n / d +float __wrap___aeabi_fdiv(float n, float d) { + return qfp_fdiv_fast( n , d ); +} +// Unit Tests: +// aeabi_fdiv(2205.1969, 270.8886) = 8.140604292687105 +// aeabi_fdiv(-2205.1969, 270.8886) = -8.140604292687105 +// aeabi_fdiv(2205.1969, -270.8886) = -8.140604292687105 +// aeabi_fdiv(-2205.1969, -270.8886) = 8.140604292687105 + +float __wrap___aeabi_fadd(float a, float b) { + return qfp_fadd( a , b ); +} +// Unit Tests: +// aeabi_fadd(2205.1969, 270.8886) = 2476.0855 +// aeabi_fadd(-2205.1969, 270.8886) = -1934.3083 +// aeabi_fadd(2205.1969, -270.8886) = 1934.3083 +// aeabi_fadd(-2205.1969, -270.8886) = -2476.0855 + +float __wrap___aeabi_fsub(float a, float b) { + return qfp_fsub( a , b ); +} +// Unit Tests: +// aeabi_fsub(2205.1969, 270.8886) = 1934.3083 +// aeabi_fsub(-2205.1969, 270.8886) = -2476.0855 +// aeabi_fsub(2205.1969, -270.8886) = 2476.0855 +// aeabi_fsub(-2205.1969, -270.8886) = -1934.3083 + +float __wrap___aeabi_fmul(float a, float b) { + return qfp_fmul( a , b ); +} +// Unit Tests: +// aeabi_fmul(2205.1969, 270.8886) = 597362.70096534 +// aeabi_fmul(-2205.1969, 270.8886) = -597362.70096534 +// aeabi_fmul(2205.1969, -270.8886) = -597362.70096534 +// aeabi_fmul(-2205.1969, -270.8886) = 597362.70096534 + +/////////////////////////////////////////////////////////////////////////////// +// Table 5, Standard integer to floating-point conversions + + +// integer to float C-style conversion. +float __wrap___aeabi_i2f(int x) { + return qfp_int2float(x); +} + +float __wrap___aeabi_ui2f(unsigned int x) { + return qfp_uint2float(x); +} + +/////////////////////////////////////////////////////////////////////////////// +// Table 6, Standard floating-point to integer conversions + +// float to integer C-style conversion. "z" means round towards 0. +int __wrap___aeabi_f2iz(float x) { + if (qfp_fcmp(x, 0) == 0) { return 0; } + // qfp_float2int() works like floor(). If x is negative, we add 1 to the result. + int xfloored = qfp_float2int(x); + if (xfloored < 0) { return xfloored + 1; } + return xfloored; +} +// Unit Tests: +// aeabi_f2iz(0) = 0 +// aeabi_f2iz(2205.1969) = 2205 +// aeabi_f2iz(-2205.1969) = -2205 + +// float to unsigned C-style conversion. "z" means round towards 0. +unsigned __wrap___aeabi_f2uiz(float x) { + if (qfp_fcmp(x, 0) == 0) { return 0; } + if (qfp_fcmp(x, 0) < 0) { return 0; } + return qfp_float2uint(x); +} +// Unit Tests: +// aeabi_f2iz(0) = 0 +// aeabi_f2uiz(2205.1969) = 2205 +// aeabi_f2uiz(-2205.1969) = 0 diff --git a/gcc/qfplib/LICENCE b/gcc/qfplib/LICENCE new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/gcc/qfplib/LICENCE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/gcc/qfplib/README b/gcc/qfplib/README new file mode 100644 index 0000000..259d485 --- /dev/null +++ b/gcc/qfplib/README @@ -0,0 +1,13 @@ +Qfplib is open source, licensed under version 2 of the GNU GPL. A copy +of that licence is included in this archive. The archive also contains: + +- qfplib.s, the source code to qfplib. The GNU assembler syntax is used. + +- qfplib.h, a C header file giving prototypes for the qfplib functions. + +- qfpio.s, the source code to qfpio, routines for converting between +strings and floating-point values. + +- qfpio.h, a C header file giving prototypes for the qfpio functions. + +Visit http://www.quinapalus.com/qfplib.html for more information. diff --git a/gcc/qfplib/qfpio.h b/gcc/qfplib/qfpio.h new file mode 100644 index 0000000..1be74a0 --- /dev/null +++ b/gcc/qfplib/qfpio.h @@ -0,0 +1,32 @@ +// Copyright 2015 Mark Owen +// http://www.quinapalus.com +// E-mail: qfp@quinapalus.com +// +// This file is free software: you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License +// as published by the Free Software Foundation. +// +// This file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this file. If not, see or +// write to the Free Software Foundation, Inc., 51 Franklin Street, +// Fifth Floor, Boston, MA 02110-1301, USA. + +#ifndef _QFPIO_H_ +#define _QFPIO_H_ + +#ifdef __cplusplus + extern "C" { +#endif + +extern void qfp_float2str(float f,char*s,unsigned int fmt); +extern int qfp_str2float(float*f,char*p,char**endptr); + +#ifdef __cplusplus + } // extern "C" +#endif +#endif diff --git a/gcc/qfplib/qfpio.s b/gcc/qfplib/qfpio.s new file mode 100644 index 0000000..d641d1a --- /dev/null +++ b/gcc/qfplib/qfpio.s @@ -0,0 +1,868 @@ +@ Copyright 2015 Mark Owen +@ http://www.quinapalus.com +@ E-mail: qfp@quinapalus.com +@ +@ This file is free software: you can redistribute it and/or modify +@ it under the terms of version 2 of the GNU General Public License +@ as published by the Free Software Foundation. +@ +@ This file is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this file. If not, see or +@ write to the Free Software Foundation, Inc., 51 Franklin Street, +@ Fifth Floor, Boston, MA 02110-1301, USA. + +.syntax unified +.cpu cortex-m0 +.thumb + +@ exported symbols + +.global qfp_float2str +.global qfp_str2float + +@ C code in comments is intended to give an idea of the function +@ of the following assembler code. The translation is not exact. + +@ // multiply by 128/125: used by conversions in both directions +@ unsigned int div125(unsigned int u) { +@ unsigned int a,b,c,k0=0x4189; // 0x4189~=128/125 Q14 +@ a=u>>14; +@ a=a*k0; // calculate first approximation to answer, good to about 14 bits +@ b=((a>>1)+(a>>2))>>4; +@ b=a-(b>>1)-(b&1); // find error in approximation +@ c=(u-b)*k0; +@ return a+(c>>14)+1; // result good to about 28 bits +@ } + +div125: + push {r1-r4,r14} + ldr r4,=#0x4189 @ k0=0x4189; + lsrs r1,r0,#14 @ a=u>>14; + muls r1,r4 @ a=a*k0; + lsrs r2,r1,#1 @ a>>1 + lsrs r3,r1,#2 @ a>>2 + add r2,r3 @ (a>>1)+(a>>2) + lsrs r2,#4 @ b=((a>>1)+(a>>2))>>4; + subs r0,r1 @ u-a + lsrs r2,#1 @ b>>1 + adcs r0,r2 @ u-a+(b>>1)+(b&1) + muls r0,r4 @ c=(u-b)*k0; + lsrs r0,#14 @ c>>14 + add r0,r1 @ a+(c>>14) + adds r0,#1 @ a+(c>>14)+1 + pop {r1-r4,r15} + +.ltorg + +opoint: @ output decimal point + adds r5,#2 + movs r3,#'.' + b och +ozero: @ output '0' + movs r3,#0 +odig: @ output one digit from r3 + adds r3,#'0' +och: @ output one character from r3 + strb r3,[r1] + adds r1,#1 + bx r14 + +naninf: @ r4=0 for Inf, otherwise NaN + ldr r3,=#0x00666e49 @ "fnI" + cmp r4,#0 + beq 1f + ldr r3,=#0x004e614e @ "NaN" +1: + bl och + lsrs r3,#8 + bne 1b + b 10f + +@ fmt is format control word: +@ b7..b0: number of significant figures +@ b15..b8: -(minimum exponent printable in F format) +@ b23..b16: maximum exponent printable in F format-1 +@ b24: output positive mantissas with ' ' +@ b25: output positive mantissas with '+' +@ b26: output positive exponents with ' ' +@ b27: output positive exponents with '+' +@ b28: suppress traling zeros in fraction +@ b29: fixed-point output: b7..0 give number of decimal places +@ default: 0x18060406 +@ Note that if b28 is set (as it is in the default format value) the code will +@ write the trailing decimal point and zeros to the output buffer before truncating +@ the string. Thus it is essential that the output buffer is large enough to accommodate +@ these characters temporarily. +@ +@ Overall accuracy is sufficient to print all exactly-representable integers up to 10^8 correctly +@ in 0x18160408 format. +@ +@ void float2str(float f,char*s,unsigned int fmt) { + +qfp_float2str: + push {r4-r7,r14} + +@ if(fmt==0) fmt=0x18060406; // default format + + cmp r2,#0 + bne 1f + ldr r2,=#0x18060406 +1: + +@ i=*(int*)&f; +@ if(i&0x80000000) { // output sign of mantissa +@ *p++='-'; +@ i&=0x7fffffff; +@ } else { +@ if(fmt&0x01000000) *p++=' '; +@ else if(fmt&0x02000000) *p++='+'; +@ } + + movs r3,#'-' + lsls r0,#1 + bcs 2f + movs r3,#' ' + lsrs r4,r2,#25 + bcs 2f + movs r3,#'+' + lsrs r4,r2,#26 + bcc 3f +2: + bl och +3: + +@ e2=(i>>23)-127; // get binary exponent e2 + + movs r4,#0 + lsrs r3,r0,#24 + beq 1f @ treat zero case specially + subs r3,#127 + +@ m=((i&0x7fffff)|0x800000)<<8; // get mantissa, restore implied 1, make Q31 + + lsls r4,r0,#8 + cmp r3,#128 + beq naninf @ handle NaN/Inf cases + adds r4,#1 + +@ if(e2==-127) {e2=0; m=0;} // flush denormals to zero + + movs r5,#1 + rors r4,r5 +1: + movs r0,r4 + +@ now binary exponent e2 in r3, mantissa in r0 + +@ e10=0; // decimal exponent +@ overall plan is to manipulate m, e2 and e10 so as to take e2 to zero, while maintaining the +@ invariant m * 2^e2 * 10^e10 + + movs r4,#0 + +@ while(e2>0) { // add 3 to e10, take 10 off e2, multiply m by 1024/1000=128/125 +@ if(m>=0xf0000000) m>>=1,e2++; +@ m=div125(m); +@ e2-=10; +@ e10+=3; +@ } // now e2<=0 + + b 2f +1: + lsrs r5,r0,#28 + cmp r5,#0x0f + blo 3f + lsrs r0,#1 + adds r3,#1 +3: + bl div125 + subs r3,#10 + adds r4,#3 +2: + cmp r3,#0 + bgt 1b + +@ while(e2<=-10) { // take 3 off e10, add 10 to e2, multiply m by 1000/1024=125/128 +@ m0=(m>>5)+(m>>6); +@ m-=(m0>>1)+(m0&1); // *125/128, more accurate than using multiply instruction +@ e2+=10; +@ e10-=3; +@ } // now -10 < e2 <= 0 + + b 2f +1: + lsrs r5,r0,#5 + lsrs r6,r0,#6 + add r5,r6 + movs r6,#0 + lsrs r5,#1 + adcs r5,r6 + subs r0,r5 + subs r4,#3 +2: + adds r3,#10 + ble 1b + subs r3,#10 + +@ m>>=1; // Q30; make sure m will not overflow + + lsrs r0,#1 + +@ while(e2<=-3) { // take 1 off e10, add 3 to e2, multiply m by 10/8 +@ m0=m>>1; +@ m+=(m0>>1)+(m0&1); // *10/8 +@ e2+=3; +@ e10--; +@ } // now -3 < e2 <=0 + + b 2f +1: + lsrs r5,r0,#1 + lsrs r5,#1 + adcs r0,r5 + subs r4,#1 +2: + adds r3,#3 + ble 1b + subs r3,#3 + +@ while(e2<0) { // add 1 to e2, halve m +@ m>>=1; // *1/2 +@ e2++; +@ } // now e2==0 + + b 2f +1: + lsrs r0,#1 +2: + adds r3,#1 + ble 1b + subs r3,#1 + +@ if(m>=0x40000000) m>>=2; // convert Q30 to Q28 +@ else { +@ m=(m<<1)+(m>>1)+(m&1); // multiply by 10 (maintaining accuracy) if result will not overflow, compensate e10 +@ e10--; +@ } + + lsrs r5,r0,#30 + beq 1f + lsrs r0,#2 + b 2f +1: + lsls r5,r0,#1 + lsrs r0,#1 + adcs r0,r5 + subs r4,#1 +2: + +@ now all of binary exponent has been transferred to decimal exponent +@ we have +@ r0: mantissa m, Q28, 1<=m<10 +@ r1: output pointer +@ r2: format +@ r3: 0 (was binary exponent) +@ r4: decimal exponent e10 + +@ sf=fmt&0xff; // number of significant figures + + uxtb r3,r2 @ e2 is no longer used + +@ ff=0; // flag to indicate that output is in "F" format (i.e., will not use "E" notation) + + movs r5,#0 + +@ d0=e10; // first digit output has significance 10^d0 wrt output '.' +@ d1=d0-sf; // last digit output has significance 10^(d1+1) wrt output '.' + + movs r6,r4 + subs r7,r6,r3 + +@ r0: mantissa m, Q28, 1<=m<10 +@ r1: output pointer +@ r2: format +@ r3: sf +@ r4: decimal exponent e10 +@ r5b0: ff +@ r6: d0 +@ r7: d1 + +@ if(fmt&0x20000000) { // forced "F" output format? +@ d1=-(fmt&0xff)-1; +@ sf=d0-d1; +@ ff=1; +@ } + + push {r1,r2} + lsrs r1,r2,#30 + bcc 1f + mvns r7,r3 + subs r3,r6,r7 + movs r5,#1 +1: + +@ m0=0x08000000; // 0.5 Q28 +@ for(i=1;i>1; // multiply by 0.1 +@ m0+=m0>>4; +@ m0+=m0>>8; +@ m0+=m0>>16; +@ m0>>=4; +@ } +@ m+=m0; // rounding + + push {r3} + movs r1,#8 + lsls r1,#24 +2: + subs r3,#1 + ble 1f + lsrs r2,r1,#1 + add r1,r2 + lsrs r2,r1,#4 + add r1,r2 + lsrs r2,r1,#8 + add r1,r2 + lsrs r2,r1,#16 + add r1,r2 + lsrs r1,#4 + b 2b +1: + add r0,r1 + pop {r3} + +@ if(m>=0xa0000000) { // has rounding pushed m to 10 (Q28)? if so, set to 1 and increment decimal exponent +@ m=0x10000000; +@ e10++; +@ d0++; +@ if((fmt&0x20000000)==0) d1++; +@ } + + lsrs r1,r0,#28 + cmp r1,#0x0a + pop {r1,r2} + blo 1f + lsrs r0,r2,#30 + bcs 2f + adds r7,#1 +2: + movs r0,#0x10 + lsls r0,#24 + adds r4,#1 + adds r6,#1 +1: + +@ if(d0>=-(int)((fmt>>8)&0xff)&&d0<(int)((fmt>>16)&0xff)) ff=1; // in range for F format? + + push {r4} + lsrs r4,r2,#8 + uxtb r4,r4 + adds r4,r6 + blt 1f + lsrs r4,r2,#16 + uxtb r4,r4 + cmp r6,r4 + bge 1f + movs r5,#1 +1: + +@ if(!ff) d0=0,d1=-sf; // for E format we have one digit before the decimal point + + cmp r5,#0 + bne 1f + movs r6,#0 + rsbs r7,r3,#0 +1: + +@ sf (r3) no longer used + +@ f0=0; // flag to indicate whether we have we output a '.' + +@ f0 in r5b1 + +@ if(d0<0) *p++='0',*p++='.',f0=1,i=-1; // value <1, so output "0." +@ else i=d0; + + mov r4,r6 + cmp r6,#0 + bge 1f + bl ozero + bl opoint + movs r4,#0 + mvns r4,r4 +1: + +@ while(i>d0&&i>d1) *p++='0',i--; // output leading zeros before significand as necessary + +2: + cmp r4,r6 + ble 1f + cmp r4,r7 + ble 1f + bl ozero + subs r4,#1 + b 2b +1: + +@ d0 (r6) no longer used + +@ for(;i>d1;i--) { // now output digits of significand +@ *p++='0'+(m>>28); // output integer part of Q28 value +@ m&=0x0fffffff; // fractional part of Q28 value +@ m=(m<<1)+(m<<3); // multiply by 10 +@ if(i==0) *p++='.',f0=1; // output decimal point as significance goes through 10^0 +@ } + +2: + cmp r4,r7 + ble 1f + lsrs r3,r0,#28 + bl odig + lsls r0,#4 + lsrs r0,#1 + lsrs r3,r0,#2 + add r0,r3 + subs r4,#1 + bcs 2b + bl opoint + b 2b +1: + +@ m (r0) no longer used +@ d1 (r7) no longer used + +@ for(;i>=0;i--) *p++='0'; // output remaining zeros of integer part + +2: + cmp r4,#0 + blt 1f + bl ozero + subs r4,#1 + b 2b +1: + +@ i (r4) no longer used + +@ if(f0) { // remove trailing zeros and decimal point? +@ if(fmt&0x10000000) while(p[-1]=='0') p--; +@ if(p[-1]=='.') p--; +@ *p=0; +@ } + + lsrs r4,r5,#2 + bcc 1f + lsrs r4,r2,#29 + bcc 2f +3: + subs r1,#1 + ldrb r4,[r1] + cmp r4,#'0' + beq 3b + adds r1,#1 +2: + subs r1,#1 + ldrb r4,[r1] + cmp r4,#'.' + beq 4f + adds r1,#1 +4: +1: + pop {r4} + +@ now: +@ r0 +@ r1: output pointer +@ r2: format +@ r3 +@ r4: decimal exponent e10 +@ r5b0: ff +@ r6: +@ r7: + +@ if(!ff) { // output exponent? + + lsrs r5,#1 + bcs 10f + +@ *p++='E'; + + movs r3,#'E' + bl och + +@ if(e10<0) *p++='-',e10=-e10; // output exponent sign +@ else { +@ if(fmt&0x04000000) *p++=' '; +@ else if(fmt&0x08000000) *p++='+'; +@ } + + cmp r4,#0 + bge 2f + rsbs r4,#0 + movs r3,#'-' + b 3f +2: + movs r3,#' ' + lsrs r6,r2,#27 + bcs 3f + movs r3,#'+' + lsrs r6,r2,#28 + bcc 4f +3: + bl och +4: + +@ m=(e10*0xcd)>>11; // tens digit of exponent +@ *p++='0'+m; +@ e10-=m*10; // units digit of exponent +@ *p++='0'+e10; + + movs r3,#0xcd + muls r3,r4 + lsrs r3,#11 + movs r0,#10 + muls r0,r3 + bl odig + subs r3,r4,r0 + bl odig + +@ } + +10: + +@ *p++=0; + + movs r3,#0 + bl och + +@ } + + pop {r4-r7,r15} + + + + + +@ Convert string pointed to by p into float, stored at f. On failure +@ return 1; on success, return 0 and store pointer to first non-converted +@ character at endptr if endptr!=0. + +@ #define ISDIG(x) ((x)>='0'&&(x)<='9') + +isdig: @ convert ASCII to digit + subs r2,#'0' + cmp r2,#10 @ clear carry if digit + bx r14 + +@ int str2float(float*f,char*p,char**endptr) { + +qfp_str2float: + +@ if(*p=='+') p++; +@ else if(*p=='-') sm=0x80000000,p++; // capture mantissa sign + + push {r0,r2,r4-r7,r14} + movs r7,#0 + ldrb r2,[r1] + cmp r2,#'+' + beq 1f + cmp r2,#'-' + bne 2f + movs r7,#1 +1: + adds r1,#1 +2: + movs r0,#0 @ mantissa + movs r3,#0 @ f0: have we seen a '.'? + movs r5,#0 @ exponent + movs r6,#0 @ count of mantissa digits processed + +@ r0: m +@ r1: input pointer +@ r3: f0 +@ r5: e +@ r6: d +@ r7b0: sm +@ stack: output pointer, end pointer + +@ for(;;) { +@ if(f0==0&&*p=='.') {f0=1; p++; continue;} +@ if(!ISDIG(*p)) goto l0; // break out on non-digit +@ if(m<0x10000000) { // accumulate digits (up to about 8 significant figures) +@ m=m*10+*p-'0'; +@ if(f0==1) e--; // decrement exponent if we are past the decimal point +@ } else if(f0==0) e++; // just increment exponent after we have captured enough significance in m +@ d++; +@ p++; +@ } +@ l0: + +2: + ldrb r2,[r1] + cmp r2,#'.' + bne 1f + cmp r3,#0 + bne 1f + movs r3,#1 + b 3f +1: + bl isdig + bcs 4f + lsrs r4,r0,#28 + bne 5f + movs r4,#10 + muls r0,r4 + add r0,r2 + subs r5,#1 +5: + adds r5,#1 + subs r5,r3 + adds r6,#1 +3: + adds r1,#1 + b 2b +4: + +@ if(d==0) return 1; // no digits seen: error + + cmp r6,#0 + bne 1f + movs r0,#1 + pop {r2-r7,r15} + +@ f0 (r3) no longer used +@ d (r6) no longer used + +@ e10=0; // decimal exponent + +1: + movs r3,#0 + +@ if(*p=='e'||*p=='E') { // exponent given? +@ se=0; +@ p++; +@ if(*p=='+') p++; +@ else if(*p=='-') se=1,p++; // capture exponent sign +@ while(ISDIG(*p)) { // capture exponent digits +@ if(e10<0x01000000) e10=e10*10+*p-'0'; // prevent overflow +@ p++; +@ } +@ if(se) e10=-e10; // apply exponent sign +@ } + + mov r6,r1 @ save r1 + ldrb r2,[r1] + cmp r2,#'e' + beq 1f + cmp r2,#'E' + bne 2f +1: + adds r1,#1 + ldrb r2,[r1] + cmp r2,#'+' + beq 3f + cmp r2,#'-' + bne 4f + adds r7,#2 @ se in r7b1 +3: + adds r1,#1 + ldrb r2,[r1] +4: + bl isdig + bcc 6f + mov r1,r6 @ E without following digits: restore r1 + b 2f +6: + lsrs r4,r3,#24 + bne 5f + movs r4,#10 + muls r3,r4 + add r3,r2 +5: + adds r1,#1 + ldrb r2,[r1] + bl isdig + bcc 6b + cmp r7,#2 + blo 2f + rsbs r3,#0 +2: + +@ if(m==0) goto l2; // zero? then we have finished + + movs r2,#0 + cmp r0,#0 + beq 11f + +@ e10+=e; // offset e by captured exponent +@ if(e10> 127) e10=127; // clip overflows: 10^127 will be converted later to Inf, 10^-128 to zero +@ if(e10<-128) e10=-128; + + add r3,r5 + lsls r4,r3,#2 @ temporarily set e2 to e10*4: this will cause subsequent conversion to Inf/zero if required + sxtb r5,r3 + cmp r5,r3 + bne 12f @ not equal to its sign-extended version? + +@ e (r5) no longer used + +@ r0: m +@ r1: input pointer +@ r3: e10 +@ r7b0: sm +@ stack: output pointer, end pointer + +@ e2=31; // binary exponent +@ plan is to manipulate m, e2 and e10 so as to take e10 to zero, while maintaining the +@ invariant m * 2^e2 * 10^e10 + + movs r4,#31 + +@ while(m<0x40000000) m+=m,e2--; // normalise so m is now 0x40000000..0xa0000000 + +2: + lsrs r2,r0,#30 + bne 1f + lsls r0,#1 + subs r4,#1 + b 2b +1: + +@ while(e10<0) { // add 3 to e10, take 10 off e2 and multiply m by 1024/1000=128/125 +@ m=div125(m); +@ e10+=3; e2-=10; +@ if(m>=0x80000000) m>>=1,e2++; +@ } // now e10 >= 0 + +2: + cmp r3,#0 + bge 1f + bl div125 + adds r3,#3 + subs r4,#10 + lsrs r2,r0,#31 + beq 2b + lsrs r0,#1 + adds r4,#1 + b 2b +1: + +@ while(e10>2) { // take 3 off e10, add 10 to e2 and multiply m by 1000/1024=125/128 +@ m0=(m>>6)+(m>>5); +@ m-=(m0>>1)+(m0&1); // *125/128 +@ e10-=3; e2+=10; +@ } // now 0 <= e10 < 3 + +2: + cmp r3,#2 + ble 1f + lsrs r2,r0,#6 + lsrs r5,r0,#5 + add r2,r5 + movs r5,#0 + lsrs r2,#1 + adcs r2,r5 + subs r0,r2 + subs r3,#3 + adds r4,#10 + b 2b +1: + +@ while(e10>0) { // take 1off e10, add 3 to e2 and multiply m by 10/8 = 5/4 +@ m0=(m>>1); +@ m+=(m0>>1)+(m0&1); // *5/4 +@ e10-=1; e2+=3; +@ } // now e10==0 + +2: + cmp r3,#0 + ble 1f + lsrs r2,r0,#1 + lsrs r2,#1 + adcs r0,r2 + subs r3,#1 + adds r4,#3 + b 2b +1: + +@ e10 (r3) no longer used + +@ while(m<0x80000000) m+=m,e2--; // renormalise m so MSB is set + + cmp r0,#0 + blt 1f +2: + subs r4,#1 + adds r0,r0 + bpl 2b +1: + +@ m=((m>>7)+1)>>1; // to 24 bits, with rounding + + lsrs r0,#7 + adds r0,#1 + lsrs r0,#1 + +@ if(m==0x01000000) m>>=1,e2++; // has rounding pushed m to 25 bits? renormalise if so + + lsrs r2,r0,#24 + beq 1f + lsrs r0,#1 + adds r4,#1 +1: + +@ e2+=127; // add exponent offset + +12: + movs r2,#0 + movs r3,#0 + adds r4,#127 + +@ if(e2<=0) {m=0; goto l1;} // too small? flush to zero + + ble 10f + +@ if(e2>=255) {m=0x7f800000; goto l1;} // too big? make infinity + + movs r3,#255 + cmp r4,#255 + bge 10f + +@ m&=0x007fffff; // remove implied 1 + + lsls r2,r0,#9 + lsrs r2,#9 + mov r3,r4 + +@ m|=e2<<23; // insert exponent bits + +10: + lsls r3,#23 + orrs r2,r3 + +@ m|=sm; // apply mantissa sign + +11: + lsls r7,#31 + orrs r2,r7 + +@ *f=*(float*)&m; // write output +@ if(end) *end=p; + + pop {r0,r3} + str r2,[r0] + cmp r3,#0 + beq 1f + str r1,[r3] +1: + +@ return 0; + + movs r0,#0 + pop {r4-r7,r15} + +@ } diff --git a/gcc/qfplib/qfplib.h b/gcc/qfplib/qfplib.h new file mode 100644 index 0000000..aae61f6 --- /dev/null +++ b/gcc/qfplib/qfplib.h @@ -0,0 +1,54 @@ +// Copyright 2015 Mark Owen +// http://www.quinapalus.com +// E-mail: qfp@quinapalus.com +// +// Thanks to Bill Westfield +// +// This file is free software: you can redistribute it and/or modify +// it under the terms of version 2 of the GNU General Public License +// as published by the Free Software Foundation. +// +// This file is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this file. If not, see or +// write to the Free Software Foundation, Inc., 51 Franklin Street, +// Fifth Floor, Boston, MA 02110-1301, USA. + +#ifndef _QFPLIB_H_ +#define _QFPLIB_H_ + +#ifdef __cplusplus + extern "C" { +#endif + +extern float qfp_fadd(float x,float y); +extern float qfp_fsub(float x,float y); +extern float qfp_fmul(float x,float y); +extern float qfp_fdiv(float x,float y); +extern float qfp_fdiv_fast(float x,float y); +extern int qfp_float2int(float x); +extern int qfp_float2fix(float x,int y); +extern unsigned int qfp_float2uint(float x); +extern unsigned int qfp_float2ufix(float x,int y); +extern float qfp_int2float(int x); +extern float qfp_fix2float(int x,int y); +extern float qfp_uint2float(unsigned int x); +extern float qfp_ufix2float(unsigned int x,int y); +extern int qfp_fcmp(float x,float y); +extern float qfp_fcos(float x); +extern float qfp_fsin(float x); +extern float qfp_ftan(float x); +extern float qfp_fatan2(float y,float x); +extern float qfp_fexp(float x); +extern float qfp_fln(float x); +extern float qfp_fsqrt(float x); +extern float qfp_fsqrt_fast(float x); + +#ifdef __cplusplus + } // extern "C" +#endif +#endif diff --git a/gcc/qfplib/qfplib.s b/gcc/qfplib/qfplib.s new file mode 100644 index 0000000..b7202cc --- /dev/null +++ b/gcc/qfplib/qfplib.s @@ -0,0 +1,858 @@ +@ Copyright 2015 Mark Owen +@ http://www.quinapalus.com +@ E-mail: qfp@quinapalus.com +@ +@ This file is free software: you can redistribute it and/or modify +@ it under the terms of version 2 of the GNU General Public License +@ as published by the Free Software Foundation. +@ +@ This file is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this file. If not, see or +@ write to the Free Software Foundation, Inc., 51 Franklin Street, +@ Fifth Floor, Boston, MA 02110-1301, USA. + +@.equ include_faster,0 @ include fast divide and square root? +@.equ include_conversions,0 @ include float <-> fixed point conversion functions? +@.equ include_scientific,0 @ include trignometic, exponential etc. functions? + +.ifndef include_faster +.equ include_faster,1 +.endif + +.ifndef include_conversions +.equ include_conversions,1 +.endif + +.ifndef include_scientific +.equ include_scientific,0 +.endif + +.if include_scientific +.equ include_conversions,1 +.endif + +.syntax unified +.cpu cortex-m0 +.thumb + +@ exported symbols + +.global qfp_fadd +.global qfp_fsub +.global qfp_fmul +.global qfp_fdiv +.global qfp_fcmp +.if include_conversions +.global qfp_float2int +.global qfp_float2fix +.global qfp_float2uint +.global qfp_float2ufix +.global qfp_int2float +.global qfp_fix2float +.global qfp_uint2float +.global qfp_ufix2float +.endif +.if include_scientific +.global qfp_fcos +.global qfp_fsin +.global qfp_ftan +.global qfp_fatan2 +.global qfp_fexp +.global qfp_fln +.global qfp_fsqrt +.endif + +.if include_faster +.global qfp_fdiv_fast +.global qfp_fsqrt_fast +.endif + +@ exchange r0<->r1, r2<->r3 +xchxy: + push {r0,r2,r14} + mov r0,r1 + mov r2,r3 + pop {r1,r3,r15} + +@ IEEE single precision floats in r0,r1-> mantissae in r1,r0 exponents in r3,r2 *respectively* +@ trashes r4 +unpackxy: + push {r14} + bl unpackx + bl xchxy + pop {r4} + mov r14,r4 + +@ IEEE single in r0-> signed (two's complemennt) mantissa in r0 9Q23 (24 significant bits), signed exponent (bias removed) in r2 +@ trashes r4; zero, denormal -> mantissa=+/-1, exponent=-380; Inf, NaN -> mantissa=+/-1, exponent=+640 +unpackx: + lsrs r2,r0,#23 @ save exponent and sign + lsls r0,#9 @ extract mantissa + lsrs r0,#9 + movs r4,#1 + lsls r4,#23 + orrs r0,r4 @ reinstate implied leading 1 + cmp r2,#255 @ test sign bit + uxtb r2,r2 @ clear it + bls 1f @ branch on positive + rsbs r0,#0 @ negate mantissa +1: + subs r2,#1 + cmp r2,#254 @ zero/denormal/Inf/NaN? + bhs 2f + subs r2,#126 @ remove exponent bias: can now be -126..+127 + bx r14 + +2: @ here with special-case values + cmp r0,#0 + mov r0,r4 @ set mantissa to +1 + bpl 3f + rsbs r0,#0 @ zero/denormal/Inf/NaN: mantissa=+/-1 +3: + subs r2,#126 @ zero/denormal: exponent -> -127; Inf, NaN: exponent -> 128 + lsls r2,#2 @ zero/denormal: exponent -> -508; Inf, NaN: exponent -> 512 + adds r2,#128 @ zero/denormal: exponent -> -380; Inf, NaN: exponent -> 640 + bx r14 + +@ normalise and pack signed mantissa in r0 nominally 3Q29, signed exponent in r2-> IEEE single in r0 +@ trashes r4, preserves r1,r3 +@ r5: "sticky bits", must be zero iff all result bits below r0 are zero for correct rounding +packx: + lsrs r4,r0,#31 @ save sign bit + lsls r4,r4,#31 @ sign now in b31 + bpl 2f @ skip if positive + cmp r5,#0 + beq 11f + adds r0,#1 @ fiddle carry in to following rsb if sticky bits are non-zero +11: + rsbs r0,#0 @ can now treat r0 as unsigned +packx0: + bmi 3f @ catch r0=0x80000000 case +2: + subs r2,#1 @ normalisation loop + adds r0,r0 + beq 1f @ zero? special case + bpl 2b @ normalise so leading "1" in bit 31 +3: + adds r2,#129 @ (mis-)offset exponent + bne 12f @ special case: highest denormal can round to lowest normal + adds r0,#0x80 @ in special case, need to add 256 to r0 for rounding + bcs 4f @ tripped carry? then have leading 1 in C as required +12: + adds r0,#0x80 @ rounding + bcs 4f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) + cmp r5,#0 + beq 7f @ sticky bits zero? +8: + lsls r0,#1 @ remove leading 1 +9: + subs r2,#1 @ compensate exponent on this path +4: + cmp r2,#254 + bge 5f @ overflow? + adds r2,#1 @ correct exponent offset + ble 10f @ denormal/underflow? + lsrs r0,#9 @ align mantissa + lsls r2,#23 @ align exponent + orrs r0,r2 @ assemble exponent and mantissa +6: + orrs r0,r4 @ apply sign +1: + bx r14 + +5: + movs r0,#0xff @ create infinity + lsls r0,#23 + b 6b + +10: + movs r0,#0 @ create zero + bx r14 + +7: @ sticky bit rounding case + lsls r5,r0,#24 @ check bottom 8 bits of r0 + bne 8b @ in rounding-tie case? + lsrs r0,#9 @ ensure even result + lsls r0,#10 + b 9b + +@ unpack two arguments (r0,r1) and shift one down to have common exponent, returned in r2; note that arguments are exchanged +@ sticky bits shifted off bottom of smaller argument in r5 +@ following code is unnecessarily general for fadd, but is shared with atan2 +unpackxyalign: + push {r14} + bl unpackxy + lsls r0,r0,#6 @ Q29 + lsls r1,r1,#6 @ Q29 + subs r4,r2,r3 @ calculate shift + bge 1f @ x>=y? + mov r2,r3 @ no: take common exponent from y + mov r5,r0 @ potential sticky bits from x + rsbs r4,#0 @ make shift positive + asrs r0,r4 + cmp r4,#32 + blo 2f + movs r0,#0 @ large shift, so all bits are sticky and result is zero + pop {r15} +1: + mov r5,r1 @ common exponent from x; potential sticky bits from y + asrs r1,r4 + cmp r4,#32 + blo 2f + movs r1,#0 @ large shift, so all bits are sticky and result is zero + pop {r15} +2: + rsbs r4,#0 + adds r4,#32 + lsls r5,r4 @ extract sticky bits + pop {r15} + +.thumb_func +qfp_fsub: + movs r2,#1 @ subtract: flip sign bit of second argument and fall through to fadd + lsls r2,#31 + eors r1,r2 +.thumb_func +qfp_fadd: + push {r4,r5,r14} + bl unpackxyalign + adds r0,r1 @ do addition + bne 2f @ not in Inf-Inf case? + cmp r2,#200 + blt 2f + movs r0,#1 + lsls r0,#29 @ for Inf-Inf, set mantissa to +1 to prevent zero result +2: +packret: @ common return point: "pack and return" + bl packx + pop {r4,r5,r15} + +@ signed multiply r0 1Q23 by r1 4Q23, result in r0 7Q25, sticky bits in r5 +@ trashes r3,r4 +mul0: + uxth r3,r0 @ Q23 + asrs r4,r1,#16 @ Q7 + muls r3,r4 @ L*H, Q30 signed + asrs r4,r0,#16 @ Q7 + uxth r5,r1 @ Q23 + muls r4,r5 @ H*L, Q30 signed + adds r3,r4 @ sum of middle partial products + uxth r4,r0 + muls r4,r5 @ L*L, Q46 unsigned + lsls r5,r4,#16 @ initialise sticky bits from low half of low partial product + lsrs r4,#16 @ Q25 + adds r3,r4 @ add high half of low partial product to sum of middle partial products + @ (cannot generate carry by limits on input arguments) + asrs r0,#16 @ Q7 + asrs r1,#16 @ Q7 + muls r0,r1 @ H*H, Q14 signed + lsls r0,#11 @ high partial product Q25 + lsls r1,r3,#27 @ sticky + orrs r5,r1 @ collect further sticky bits + asrs r1,r3,#5 @ middle partial products Q25 + adds r0,r1 @ final result + bx r14 + +.thumb_func +qfp_fcmp: + movs r2,#1 @ initialise result + lsls r3,r2,#31 @ r3=0x80000000 + tst r0,r3 @ check sign of first argument + beq 1f + subs r0,r3,r0 @ convert to 2's complement form for direct comparison +1: + tst r1,r3 @ repeat for second argument + beq 2f + subs r1,r3,r1 +2: + subs r0,r1 @ perform comparison + beq 4f @ equal? return 0 + bgt 3f @ r0>r1? return +1 + rsbs r2,#0 @ r0>20 Q7 + ldrb r4,[r4,r3] @ m=rcpapp[(y>>20)&7]; // Q8, .5>=21; // Q12 + muls r3,r4 @ s*=m; // Q28 + asrs r3,#12 @ s>>=12; // Q16 + subs r4,r3 @ m=m-s; // Q16 + + mov r3,r4 @ s=y*m // Q39 second Newton-Raphson iteration + muls r4,r0 @ ... + asrs r4,#16 @ s>>=16; // Q23 + muls r4,r3 @ s*=m; // Q39 + lsls r3,#8 @ m<<=8; // Q24 + asrs r4,#15 @ s>>=15; // Q24 + subs r3,r4 @ m=m-s; // Q24 + + lsls r4,r3,#7 @ \/ s=y*m; // Q47 third Newton-Raphson iteration + muls r3,r0 @ /\ m<<=7; // Q31 + asrs r3,#15 @ s>>=15; // Q32 + lsrs r0,r4,#16 @ s*=(m>>16); // Q47 + muls r3,r0 @ ... + asrs r3,#16 @ s>>=16; // Q31 + subs r0,r4,r3 @ m=m-s; // Q31 +div0: + adds r0,#7 @ rounding; reduce systematic error + lsrs r0,#4 @ Q27 + b fmul0 @ drop into multiplication code to calculate result + +@ The fast square root routine uses an initial approximation to the reciprocal of the square root of the argument based +@ on the top four bits of the mantissa (possibly shifted one place to make the exponent even). It then performs three +@ Newton-Raphson iterations, resulting in about 28-29 bits of accuracy. This reciprocal is then multiplied by +@ the original argument to produce the result. +@ Again, the fixed-point calculation is carefully implemented to preserve accuracy, and similar comments to those +@ made above on the fast division routine apply. +@ The reciprocal square root calculation has been tested for all possible (possibly shifted) input mantissa values. +.thumb_func +qfp_fsqrt_fast: + push {r4,r5,r14} + bl unpackx + movs r1,r0 + bmi infret @ negative? return -Inf + asrs r0,r2,#1 @ check LSB of exponent + bcc 1f + lsls r1,#1 @ was odd: double mantissa; mantissa y now 1..4 Q23 +1: + adds r2,#4 @ correction for packing + adr r4,rsqrtapp-4@ first four table entries are never accessed because of the mantissa's leading 1 + lsrs r3,r1,#21 @ y>>21 Q2 + ldrb r4,[r4,r3] @ initial approximation to reciprocal square root m Q8 + + lsrs r0,r1,#7 @ y>>7 // Q16 first Newton-Raphson iteration + muls r0,r4 @ m*y + muls r0,r4 @ s=m*y*y // Q32 + asrs r0,#12 @ s>>12 + muls r0,r4 @ m*s // Q28 + asrs r0,#13 @ m*s // Q15 + lsls r4,#8 @ m // Q16 + subs r4,r0 @ m=(m<<8)-(s>>13) // Q16-Q15/2 -> Q16 + + mov r0,r4 @ // second Newton-Raphson iteration + muls r0,r0 @ u=m*m // Q32 + lsrs r0,#16 @ u>>16 // Q16 + lsrs r3,r1,#7 @ y>>7 // Q16 + muls r0,r3 @ s=u*(y>>7) // Q32 + asrs r0,#12 @ s>>12 // Q20 + muls r0,r4 @ s*m // Q36 + asrs r0,#21 @ s*m // Q15 + subs r4,r0 @ m=m-s // Q16-Q15/2 + + mov r0,r4 @ // third Newton-Raphson iteration + muls r0,r0 @ u=m*m // Q32 + lsrs r3,r0,#12 @ now multiply u and y in two parts: u>>12 + muls r3,r1 @ first partial product (u>>12)*y Q43 + lsls r0,#20 + lsrs r0,#20 @ u&0xfff + lsrs r5,r1,#12 @ y>>12 + muls r0,r5 @ second partial product (u&0xfff)*(y>>12) Q43 + add r0,r3 @ s=u*y // Q43 + asrs r0,#15 @ s>>15 // Q28 + muls r0,r4 @ (s>>15)*m // Q44 + lsls r4,#13 @ m<<13 // Q29 + asrs r0,#16 @ s>>16 // Q28 + subs r0,r4,r0 @ // Q29-Q28/2 + + asrs r2,#1 @ halve exponent + bcc div0 @ was y shifted? + lsrs r0,#1 + lsls r1,#1 @ shift y back + b div0 @ round and complete with multiplication + +.align 2 + +@ round(2^15./[136:16:248]) +rcpapp: +.byte 0xf1,0xd8,0xc3,0xb2, 0xa4,0x98,0x8d,0x84 + +@ round(sqrt(2^22./[72:16:248])) +rsqrtapp: +.byte 0xf1,0xda,0xc9,0xbb, 0xb0,0xa6,0x9e,0x97, 0x91,0x8b,0x86,0x82 + +.endif + +.if include_conversions + +@ convert float to signed int, rounding towards -Inf, clamping +.thumb_func +qfp_float2int: + movs r1,#0 @ fall through + +@ convert float in r0 to signed fixed point in r0, clamping +.thumb_func +qfp_float2fix: + push {r4,r14} + bl unpackx + add r2,r1 @ incorporate binary point position into exponent + subs r2,#23 @ r2 is now amount of left shift required + blt 1f @ requires right shift? + cmp r2,#7 @ overflow? + ble 4f +3: @ overflow + asrs r1,r0,#31 @ +ve:0 -ve:0xffffffff + mvns r1,r1 @ +ve:0xffffffff -ve:0 + movs r0,#1 + lsls r0,#31 +5: + eors r0,r1 @ +ve:0x7fffffff -ve:0x80000000 (unsigned path: 0xffffffff) + pop {r4,r15} +1: + rsbs r2,#0 @ right shift for r0, >0 + cmp r2,#32 + blt 2f @ more than 32 bits of right shift? + movs r2,#32 +2: + asrs r0,r0,r2 + pop {r4,r15} + +@ unsigned version +.thumb_func +qfp_float2uint: + movs r1,#0 @ fall through +.thumb_func +qfp_float2ufix: + push {r4,r14} + bl unpackx + add r2,r1 @ incorporate binary point position into exponent + movs r1,r0 + bmi 5b @ negative? return zero + subs r2,#23 @ r2 is now amount of left shift required + blt 1b @ requires right shift? + mvns r1,r0 @ ready to return 0xffffffff + cmp r2,#8 @ overflow? + bgt 5b +4: + lsls r0,r0,r2 @ result fits, left shifted + pop {r4,r15} + +@ convert signed int to float, rounding +.thumb_func +qfp_int2float: + movs r1,#0 @ fall through + +@ convert signed fix to float, rounding; number of r0 bits after point in r1 +.thumb_func +qfp_fix2float: + push {r4,r5,r14} +1: + movs r2,#29 + subs r2,r1 @ fix exponent +packretns: @ pack and return, sticky bits=0 + movs r5,#0 + b packret + +@ unsigned version +.thumb_func +qfp_uint2float: + movs r1,#0 @ fall through +.thumb_func +qfp_ufix2float: + push {r4,r5,r14} + cmp r0,#0 + bge 1b @ treat <2^31 as signed + movs r2,#30 + subs r2,r1 @ fix exponent + lsls r5,r0,#31 @ one sticky bit + lsrs r0,#1 + b packret + +.endif + +.if include_scientific + +@ All the scientific functions are implemented using the CORDIC algorithm. For notation, +@ details not explained in the comments below, and a good overall survey see +@ "50 Years of CORDIC: Algorithms, Architectures, and Applications" by Meher et al., +@ IEEE Transactions on Circuits and Systems Part I, Volume 56 Issue 9. + +@ Register use: +@ r0: x +@ r1: y +@ r2: z/omega +@ r3: coefficient pointer +@ r4,r8: m +@ r5: i (shift) + +cordic_start: @ initialisation + mov r7,r8 + push {r7} + movs r5,#0 @ initial shift=0 + mov r8,r4 + b 5f + +cordic_vstep: @ one step of algorithm in vector mode + cmp r1,#0 @ check sign of y + bgt 4f + b 1f +cordic_rstep: @ one step of algorithm in rotation mode + cmp r2,#0 @ check sign of angle + bge 1f +4: + subs r1,r6 @ negative rotation: y=y-(x>>i) + rsbs r7,#0 + adds r2,r4 @ accumulate angle + b 2f +1: + adds r1,r6 @ positive rotation: y=y+(x>>i) + subs r2,r4 @ accumulate angle +2: + mov r4,r8 + muls r7,r4 @ apply sign from m + subs r0,r7 @ finish rotation: x=x{+/-}(y>>i) +5: + ldr r4,[r3] @ fetch next angle from table + adds r3,#4 @ bump pointer + lsrs r4,#1 @ repeated angle? + bcs 3f + adds r5,#1 @ adjust shift if not +3: + mov r6,r0 + asrs r6,r5 @ x>>i + mov r7,r1 + asrs r7,r5 @ y>>i + lsrs r4,#1 @ shift end flag into carry + bx r14 + +@ CORDIC rotation mode +cordic_rot: + push {r6,r7,r14} + bl cordic_start @ initialise +1: + bl cordic_rstep + bcc 1b @ step until table finished + asrs r6,r0,#14 @ remaining small rotations can be linearised: see IV.B of paper referenced above + asrs r7,r1,#14 + asrs r2,#3 + muls r6,r2 @ all remaining CORDIC steps in a multiplication + muls r7,r2 + mov r4,r8 + muls r7,r4 + asrs r6,#12 + asrs r7,#12 + subs r0,r7 @ x=x{+/-}(yz>>k) + adds r1,r6 @ y=y+(xz>>k) +cordic_exit: + pop {r7} + mov r8,r7 + pop {r6,r7,r15} + +@ CORDIC vector mode +cordic_vec: + push {r6,r7,r14} + bl cordic_start @ initialise +1: + bl cordic_vstep + bcc 1b @ step until table finished +4: + cmp r1,#0 @ continue as in cordic_vstep but without using table; x is not affected as y is small + bgt 2f @ check sign of y + adds r1,r6 @ positive rotation: y=y+(x>>i) + subs r2,r4 @ accumulate angle + b 3f +2: + subs r1,r6 @ negative rotation: y=y-(x>>i) + adds r2,r4 @ accumulate angle +3: + asrs r6,#1 + asrs r4,#1 @ next "table entry" + bne 4b + b cordic_exit + +.thumb_func +qfp_fsin: @ calculate sin and cos using CORDIC rotation method + push {r4,r5,r14} + movs r1,#24 + bl qfp_float2fix @ range reduction by repeated subtraction/addition in fixed point + ldr r4,pi_q29 + lsrs r4,#4 @ 2pi Q24 +1: + subs r0,r4 + bge 1b +1: + adds r0,r4 + bmi 1b @ now in range 0..2pi + lsls r2,r0,#2 @ z Q26 + lsls r5,r4,#1 @ pi Q26 (r4=pi/2 Q26) + ldr r0,=#0x136e9db4 @ initialise CORDIC x,y with scaling + movs r1,#0 +1: + cmp r2,r4 @ >pi/2? + blt 2f + subs r2,r5 @ reduce range to -pi/2..pi/2 + rsbs r0,#0 @ rotate vector by pi + b 1b +2: + lsls r2,#3 @ Q29 + adr r3,tab_cc @ circular coefficients + movs r4,#1 @ m=1 + bl cordic_rot + adds r1,#9 @ fiddle factor to make sin(0)==0 + movs r2,#0 @ exponents to zero + movs r3,#0 + movs r5,#0 @ no sticky bits + bl packx @ pack cosine + bl xchxy + b packretns @ pack sine + +.thumb_func +qfp_fcos: + push {r14} + bl qfp_fsin + mov r0,r1 @ extract cosine result + pop {r15} + +.thumb_func +qfp_ftan: + push {r4,r5,r14} + bl qfp_fsin @ sine in r0/r2, cosine in r1/r3 +.if include_faster + b fdiv_fast_n @ sin/cos +.else + b fdiv_n + +.endif + +.thumb_func +qfp_fexp: @ calculate cosh and sinh using rotation method; add to obtain exp + push {r4,r5,r14} + movs r1,#24 + bl qfp_float2fix @ Q24: covers entire valid input range + asrs r1,r0,#16 @ Q8 + ldr r2,=#5909 @ log_2(e) Q12 + muls r1,r2 @ estimate exponent of result Q20 + asrs r1,#19 @ Q1 + adds r1,#1 @ rounding + asrs r1,#1 @ rounded estimate of exponent of result + push {r1} @ save for later + lsls r2,r0,#5 @ Q29 + ldr r0,=#0x162e42ff @ ln(2) Q29 + muls r1,r0 @ accurate contribution of estimated exponent + subs r2,r1 @ residual to be exponentiated, approximately -.5..+.5 Q29 + ldr r0,=#0x2c9e15ca @ initialise CORDIC x,y with scaling + movs r1,#0 + adr r3,tab_ch @ hyperbolic coefficients + mvns r4,r1 @ m=-1 + bl cordic_rot @ calculate cosh and sinh + add r0,r1 @ exp=cosh+sinh + pop {r2} @ recover exponent + b packretns @ pack result + +.thumb_func +qfp_fsqrt: @ calculate sqrt and ln using vector method + push {r4,r5,r14} + bl unpackx + movs r1,r0 @ -ve argument? + bmi 3f @ return -Inf, -Inf + ldr r1,=#0x0593C2B9 @ scale factor for CORDIC + bl mul0 @ Q29 + asrs r1,r2,#1 @ halve exponent + bcc 1f + adds r1,#1 @ was odd: add 1 and shift mantissa + asrs r0,#1 +1: + push {r1} @ save exponent/2 for later + mov r1,r0 + ldr r3,=#0x0593C2B9 @ re-use constant + lsls r3,#2 + adds r0,r3 @ "a+1" + subs r1,r3 @ "a-1" + movs r2,#0 + adr r3,tab_ch @ hyperbolic coefficients + mvns r4,r2 @ m=-1 + bl cordic_vec + mov r1,r2 @ keep ln result + pop {r2} @ retrieve exponent/2 +2: + movs r3,r2 + b packretns @ pack sqrt result + +3: + movs r2,#255 + b 2b + +.thumb_func +qfp_fln: + push {r4,r5,r14} + bl qfp_fsqrt @ get unpacked ln in r1/r3; exponent has been halved + cmp r3,#70 @ ln(Inf)? + bgt 2f @ return Inf + rsbs r3,#0 + cmp r3,#70 + bgt 1f @ ln(0)? return -Inf +3: + ldr r0,=#0x0162e430 @ ln(4) Q24 + muls r0,r3 @ contribution from negated, halved exponent + adds r1,#8 @ round result of ln + asrs r1,#4 @ Q24 + subs r0,r1,r0 @ add in contribution from (negated) exponent + movs r2,#5 @ pack expects Q29 + b packretns +1: + mvns r0,r0 @ make result -Inf +2: + movs r2,#255 + b packretns + +.thumb_func +qfp_fatan2: + push {r4,r5,r14} + bl unpackxyalign @ convert to fixed point (ensure common exponent, which is discarded) + movs r2,#0 @ initial angle + cmp r0,#0 @ x negative + bge 5f + rsbs r0,#0 @ rotate to 1st/4th quadrants + rsbs r1,#0 + ldr r2,pi_q29 @ pi Q29 +5: + adr r3,tab_cc @ circular coefficients + movs r4,#1 @ m=1 + bl cordic_vec @ also produces magnitude (with scaling factor 1.646760119), which is discarded + mov r0,r2 @ result here is -pi/2..3pi/2 Q29 + ldr r2,pi_q29 @ pi Q29 + adds r4,r0,r2 @ attempt to fix -3pi/2..-pi case + bcs 6f @ -pi/2..0? leave result as is + subs r4,r0,r2 @ pi: take off 2pi +6: + subs r0,#1 @ fiddle factor so atan2(0,1)==0 + movs r2,#0 @ exponent for pack + b packretns + +.align 2 +.ltorg + +@ first entry in following table is pi Q29 +pi_q29: +@ circular CORDIC coefficients: atan(2^-i), b0=flag for preventing shift, b1=flag for end of table +tab_cc: +.word 0x1921fb54*4+1 @ no shift before first iteration +.word 0x0ed63383*4+0 +.word 0x07d6dd7e*4+0 +.word 0x03fab753*4+0 +.word 0x01ff55bb*4+0 +.word 0x00ffeaae*4+0 +.word 0x007ffd55*4+0 +.word 0x003fffab*4+0 +.word 0x001ffff5*4+0 +.word 0x000fffff*4+0 +.word 0x0007ffff*4+0 +.word 0x00040000*4+0 +.word 0x00020000*4+0+2 @ +2 marks end + +@ hyperbolic CORDIC coefficients: atanh(2^-i), flags as above +tab_ch: +.word 0x1193ea7b*4+0 +.word 0x1193ea7b*4+1 @ repeat i=1 +.word 0x082c577d*4+0 +.word 0x04056247*4+0 +.word 0x0200ab11*4+0 +.word 0x0200ab11*4+1 @ repeat i=4 +.word 0x01001559*4+0 +.word 0x008002ab*4+0 +.word 0x00400055*4+0 +.word 0x0020000b*4+0 +.word 0x00100001*4+0 +.word 0x00080001*4+0 +.word 0x00040000*4+0 +.word 0x00020000*4+0 +.word 0x00020000*4+1+2 @ repeat i=12 + +.endif + +qfp_lib_end: