sdk-hwV1.3/lichee/melis-v3.0/source/ekernel/components/aw/samples/neon/neon.c

119 lines
3.2 KiB
C
Raw Permalink Normal View History

2024-05-07 10:09:20 +00:00
/*
* ===========================================================================================
*
* Filename: neon.c
*
* Description: neon useage.
*
* Version: Melis3.0
* Create: 2020-03-13 10:49:09
* Revision: none
* Compiler: GCC:version 7.2.1 20170904 (release),ARM/embedded-7-branch revision 255204
*
* Author: caozilong@allwinnertech.com
* Organization: BU1-PSW
* Last Modified: 2020-03-20 09:04:00
*
* ===========================================================================================
*/
#include <rtthread.h>
#include <arm_neon.h>
#include <dfs_posix.h>
#include <finsh_api.h>
#include <finsh.h>
#include <debug.h>
#include <log.h>
uint32_t kernel_neon_begin(void);
void kernel_neon_end(uint32_t saved_fpexc);
static float sum_array(float *arr, int len)
{
uint32_t flags;
if (NULL == arr || len < 1)
{
__err("parameter error.");
return 0;
}
flags = kernel_neon_begin();
int dim4 = len >> 2; // 数组长度除4整数
int left4 = len & 3; // 数组长度除4余数
float32x4_t sum_vec = vdupq_n_f32(0.0);//定义用于暂存累加结果的寄存器且初始化为0
for (; dim4 > 0; dim4--, arr += 4) //每次同时访问4个数组元素
{
float32x4_t data_vec = vld1q_f32(arr); //依次取4个元素存入寄存器vec
sum_vec = vaddq_f32(sum_vec, data_vec);//ri = ai + bi 计算两组寄存器对应元素之和并存放到相应结果
}
float sum = vgetq_lane_f32(sum_vec, 0) + vgetq_lane_f32(sum_vec, 1) + vgetq_lane_f32(sum_vec, 2) + vgetq_lane_f32(sum_vec, 3); //将累加结果寄存器中的所有元素相加得到最终累加值
for (; left4 > 0; left4--, arr++)
{
sum += (*arr) ; //对于剩下的少于4的数字依次计算累加即可
}
kernel_neon_end(flags);
return sum;
}
static void init_array(float *arr, int len)
{
int i = 0;
for (i = 0; i < len; i ++)
{
arr[i] = (i * 1000) / len * 0.38;
}
}
static void neon1_entry(void *para)
{
float array[123];
init_array(array, 123);
while (1)
{
float sum = sum_array(array, 123);
printf("[DBG]: %s line %d, float sum %f, size %d.\n", __func__, __LINE__, sum, sizeof(__simd64_float32_t));
}
}
static void neon2_entry(void *para)
{
float array[255];
init_array(array, 255);
while (1)
{
float sum = sum_array(array, 255);
printf("[DBG]: %s line %d, float sum %f, size %d.\n", __func__, __LINE__, sum, sizeof(__simd64_float32_t));
}
}
static void armneon_entry(void)
{
rt_thread_t neon_thread1, neon_thread2;
rt_enter_critical();
neon_thread1 = rt_thread_create("neon1", neon1_entry, RT_NULL, 0x1000, 1, 10);
RT_ASSERT(neon_thread1 != RT_NULL);
rt_thread_startup(neon_thread1);
neon_thread2 = rt_thread_create("neon2", neon2_entry, RT_NULL, 0x1000, 1, 10);
RT_ASSERT(neon_thread2 != RT_NULL);
rt_thread_startup(neon_thread2);
rt_exit_critical();
return;
}
static int cmd_armneon(int argc, const char **argv)
{
armneon_entry();
return 0;
}
FINSH_FUNCTION_EXPORT_ALIAS(cmd_armneon, __cmd_armneon, armneon test);