I have been testing the double fpu performance on the STM32F767ZI with a few matrix multiplication operations. I decided to try the Eigen C++ library and have run into a problem that is stretching my knowledge of embedded startup routines, memory regions, linker scripts etc. So I was wondering if I could get some help with this.
I have to two code instances, one with the Eigen C++ operations and one without. The one without simple flashes an led in the main loop as show below (I still have the Eigen matrices declared as globals).
Code: Select all
#include "ch.h"
#include "hal.h"
#include "chprintf.h"
#include "Eigen/Core"
using namespace Eigen;
#define MSIZE 7
typedef double tReal;
Eigen::Matrix<tReal,MSIZE,MSIZE> A;
Eigen::Matrix<tReal,MSIZE,MSIZE> B;
Eigen::Matrix<tReal,MSIZE,MSIZE> C;
char buf[128] = {0};
int main(void)
{
halInit();
chSysInit();
sdStart(&SD3, NULL);
while (true)
{
palToggleLine(LINE_LED1);
chThdSleepMilliseconds(100);
}
}
This results in a Reset_Handler that looks something like this in dissasembly (notice the lack of D cache initialisation):
Code: Select all
Reset_Handler:
cpsid i
ldr r0, [pc, #220] ; (0x2002f0 <endfiniloop+4>)
msr PSP, r0
ldr r0, [pc, #216] ; (0x2002f4 <endfiniloop+8>)
movw r1, #60680 ; 0xed08
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
movw r0, #0
movt r0, #49152 ; 0xc000
movw r1, #61236 ; 0xef34
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
dsb sy
isb sy
movw r0, #0
movt r0, #240 ; 0xf0
movw r1, #60808 ; 0xed88
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
dsb sy
isb sy
mov.w r0, #0
vmsr fpscr, r0
movw r1, #61244 ; 0xef3c
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
movs r0, #6
msr CONTROL, r0
isb sy
bl 0x202e50 <__core_init>
bl 0x202070 <__early_init()>
Now, if I put some Eigen operations in, like this:
Code: Select all
#include "ch.h"
#include "hal.h"
#include "chprintf.h"
#include "Eigen/Core"
using namespace Eigen;
#define MSIZE 7
typedef double tReal;
Eigen::Matrix<tReal,MSIZE,MSIZE> A;
Eigen::Matrix<tReal,MSIZE,MSIZE> B;
Eigen::Matrix<tReal,MSIZE,MSIZE> C;
char buf[128] = {0};
int main(void)
{
halInit();
chSysInit();
sdStart(&SD3, NULL);
while (true)
{
A.setRandom();
B.setRandom();
C = A*B;
int sz = chsnprintf(buf, sizeof(buf), "%d, %d\r\n", (int)C(0,0));
chnWrite(&SD3, (uint8_t*)buf, sz);
palToggleLine(LINE_LED1);
chThdSleepMilliseconds(100);
}
}
The disassembly for the Reset_Handler looks like this:
Code: Select all
; <UNDEFINED> instruction: 0xffffffff
; <UNDEFINED> instruction: 0xffffffff
; <UNDEFINED> instruction: 0xffffffff
cpsid i
ldr r0, [pc, #220] ; (0x2002f0 <bloop+10>)
msr PSP, r0
ldr r0, [pc, #216] ; (0x2002f4 <bloop+14>)
movw r1, #60680 ; 0xed08
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
movw r0, #0
movt r0, #49152 ; 0xc000
movw r1, #61236 ; 0xef34
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
dsb sy
isb sy
movw r0, #0
movt r0, #240 ; 0xf0
movw r1, #60808 ; 0xed88
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
dsb sy
isb sy
mov.w r0, #0
vmsr fpscr, r0
movw r1, #61244 ; 0xef3c
movt r1, #57344 ; 0xe000
str r0, [r1, #0]
movs r0, #6
msr CONTROL, r0
isb sy
bl 0x2072d0 <SCB_EnableDCache+80>
bl 0x205650 <sd_lld_start+32>
And it all ends up in the SCB_EnableDCache function in a _unhandled_exception. This is weird because __core_init isn't called and that's where the SCB_EnableICache() and SCB_EnableDCache() functions seemed to be called from. __early_init() isn't called either, which is clearly called at line 245 in crt0_v7m.S.
This code has been built on the trunk svn repo and it was pulled today to make sure there hasn't been a fix put in place before I write this up. This is using the RT-STM32F767ZI-NUCLEO144 demo that has been adapted for c++. I've made the process stack size ridiculously large so I don't think it's an overflow. I've linked libm, libstdc++, turned off lto, turned it back on again, used -std=gnu++11 and nothing seems to work.
Any help would be greatly appreciated.