Created
August 5, 2018 07:05
-
-
Save ast/0bd22f81cc80716c8ffa796cd08e8781 to your computer and use it in GitHub Desktop.
Deinterleave, scale and multiply 4 IQ float32 samples. NEON assembly.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#if defined(__GNUC__) && !defined(__ARM_NEON__) && !defined(__ARM_NEON) | |
#error "compiling simd-neon.h requires -mfpu=neon or equivalent" | |
#endif | |
void deint_scale_mix(int16_t *frames, float complex *m, float complex *iq | |
*) { | |
float scale = 1./32767; | |
/* Deinterleave, scale and mix */ | |
asm("vdup.32 q15, %[scale]\n\t" // Load scale factor | |
// Preload? | |
"vld2.16 {d16-d19}, [%[s]]\n\t" // Load 16 samples interleaved | |
"vmovl.s16 q0, d16\n\t" // Convert 4 to s32 | |
"vmovl.s16 q2, d17\n\t" // Convert 4 to s32 | |
"vmovl.s16 q1, d18\n\t" // Convert 4 to s32 | |
"vmovl.s16 q3, d19\n\t" // Convert 4 to s32 | |
"vcvt.f32.s32 q0, q0\n\t" // Convert 4 to f32 | |
"vcvt.f32.s32 q2, q2\n\t" // Convert 4 to f32 | |
"vcvt.f32.s32 q1, q1\n\t" // Convert 4 to f32 | |
"vcvt.f32.s32 q3, q3\n\t" // Convert 4 to f32 | |
"vmul.f32 q8, q0, q15\n\t" // q8 a.r, q9 a.i, q10 a.r, q11 ai. | |
"vmul.f32 q10, q2, q15\n\t" | |
"vmul.f32 q9, q1, q15\n\t" | |
"vmul.f32 q11, q3, q15\n\t" | |
"vld2.32 { d24-d27 }, [%[m]]!\n\t" // q12 b.r, q13 b.i, q14 b.r, q15 b.i | |
"vld2.32 { d28-d31 }, [%[m]]\n\t" | |
"vmul.f32 q0,q8,q12 @ a.r * b.r [ 0-3 ]\n\t" | |
"vmul.f32 q1,q9,q12 @ a.i * b.r\n\t" | |
"vmul.f32 q2,q10,q14 @ a.r * b.r [ 4-7 ]\n\t" | |
"vmul.f32 q3,q11,q14 @ a.i * b.r\n\t" | |
"vmls.f32 q0,q9,q13 @ - a.i * b.i [ 0-3 ]\n\t" | |
"vmla.f32 q1,q8,q13 @ + a.r * b.i\n\t" | |
"vmls.f32 q2,q11,q15 @ - a.i * b.i [ 4-7 ]\n\t" | |
"vmla.f32 q3,q10,q15 @ + a.r * b.i\n\t" | |
"vst2.32 {d0-d3}, [%[iq]]!\n\t" | |
"vst2.32 {d4-d7}, [%[iq]]\n\t" | |
: : | |
[s]"r"(&iqframes[i1]), | |
[scale]"r"(scale), | |
[iq]"r"(iq), | |
[m]"r"(m) | |
: "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment