Created
August 1, 2023 16:54
-
-
Save Narsil/225fc9af3bf771655c2dda93346e166b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.globl <gemm_f16::gemm::f16::GEMM as lazy_static::LazyStatic>::initialize | |
.p2align 2 | |
<gemm_f16::gemm::f16::GEMM as lazy_static::LazyStatic>::initialize: | |
Lfunc_begin18: | |
.cfi_startproc | |
sub sp, sp, #48 | |
.cfi_def_cfa_offset 48 | |
stp x29, x30, [sp, #32] | |
add x29, sp, #32 | |
.cfi_def_cfa w29, 16 | |
.cfi_offset w30, -8 | |
.cfi_offset w29, -16 | |
.cfi_remember_state | |
Lloh101: | |
adrp x8, <gemm_f16::gemm::f16::GEMM as core::ops::deref::Deref>::deref::__stability::LAZY@PAGE | |
Lloh102: | |
add x8, x8, <gemm_f16::gemm::f16::GEMM as core::ops::deref::Deref>::deref::__stability::LAZY@PAGEOFF | |
str x8, [sp, #8] | |
add x8, x8, #8 | |
ldapr x8, [x8] | |
cmp x8, #3 | |
b.ne LBB18_2 | |
LBB18_1: | |
.cfi_def_cfa wsp, 48 | |
ldp x29, x30, [sp, #32] | |
add sp, sp, #48 | |
.cfi_def_cfa_offset 0 | |
.cfi_restore w30 | |
.cfi_restore w29 | |
ret | |
LBB18_2: | |
.cfi_restore_state | |
add x8, sp, #8 | |
str x8, [sp, #16] | |
add x8, sp, #16 | |
stur x8, [x29, #-8] | |
Lloh103: | |
adrp x0, <gemm_f16::gemm::f16::GEMM as core::ops::deref::Deref>::deref::__stability::LAZY@PAGE+8 | |
Lloh104: | |
add x0, x0, <gemm_f16::gemm::f16::GEMM as core::ops::deref::Deref>::deref::__stability::LAZY@PAGEOFF+8 | |
Lloh105: | |
adrp x3, l___unnamed_3@PAGE | |
Lloh106: | |
add x3, x3, l___unnamed_3@PAGEOFF | |
Lloh107: | |
adrp x4, l___unnamed_25@PAGE | |
Lloh108: | |
add x4, x4, l___unnamed_25@PAGEOFF | |
sub x2, x29, #8 | |
mov w1, #0 | |
bl std::sys_common::once::queue::Once::call | |
b LBB18_1 | |
.loh AdrpAdd Lloh101, Lloh102 | |
.loh AdrpAdd Lloh107, Lloh108 | |
.loh AdrpAdd Lloh105, Lloh106 | |
.loh AdrpAdd Lloh103, Lloh104 | |
Lfunc_end18: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x1: | |
Lfunc_begin19: | |
.cfi_startproc | |
sub sp, sp, #16 | |
.cfi_def_cfa_offset 16 | |
movi.2d v0, #0000000000000000 | |
cmp x2, #2 | |
b.hs LBB19_2 | |
mov x8, x5 | |
b LBB19_4 | |
LBB19_2: | |
ldp x10, x8, [sp, #16] | |
lsr x9, x2, #1 | |
lsl x10, x10, #1 | |
lsl x11, x8, #1 | |
lsl x12, x11, #1 | |
mov x8, x5 | |
LBB19_3: | |
ld1r.8h { v1 }, [x8], x12 | |
ldr q2, [x4] | |
; InlineAsm Start | |
fmla.8h v0, v2, v1 | |
; InlineAsm End | |
add x13, x5, x11 | |
ld1r.8h { v1 }, [x13] | |
ldr q2, [x4, x10] | |
; InlineAsm Start | |
fmla.8h v0, v2, v1 | |
; InlineAsm End | |
add x4, x4, x10, lsl #1 | |
mov x5, x8 | |
subs x9, x9, #1 | |
b.ne LBB19_3 | |
LBB19_4: | |
tbz w2, #0, LBB19_6 | |
ld1r.8h { v1 }, [x8] | |
ldr q2, [x4] | |
; InlineAsm Start | |
fmla.8h v0, v2, v1 | |
; InlineAsm End | |
ldrh w8, [sp, #42] | |
ldrh w9, [sp, #40] | |
ldrb w10, [sp, #44] | |
cmp x0, #8 | |
b.ne LBB19_12 | |
cmp x1, #1 | |
b.ne LBB19_12 | |
cmp x7, #1 | |
b.ne LBB19_12 | |
dup.8h v1, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB19_20 | |
cmp w8, #2 | |
b.ne LBB19_21 | |
dup.8h v2, w9 | |
ldr q3, [x3] | |
; InlineAsm Start | |
fmul.8h v4, v2, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v1, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v4, v2 | |
; InlineAsm End | |
str q0, [x3] | |
b LBB19_35 | |
LBB19_12: | |
str q0, [sp] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB19_23 | |
cmp w10, #1 | |
b.ne LBB19_29 | |
cbz x1, LBB19_35 | |
cbz x0, LBB19_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB19_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB19_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB19_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB19_17 | |
b LBB19_35 | |
LBB19_20: | |
ldr q2, [x3] | |
; InlineAsm Start | |
fmla.8h v2, v1, v0 | |
; InlineAsm End | |
b LBB19_22 | |
LBB19_21: | |
; InlineAsm Start | |
fmul.8h v2, v1, v0 | |
; InlineAsm End | |
LBB19_22: | |
str q2, [x3] | |
b LBB19_35 | |
LBB19_23: | |
cbz x1, LBB19_35 | |
cbz x0, LBB19_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB19_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB19_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB19_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB19_26 | |
b LBB19_35 | |
LBB19_29: | |
cbz x1, LBB19_35 | |
cbz x0, LBB19_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB19_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB19_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB19_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB19_32 | |
LBB19_35: | |
add sp, sp, #16 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end19: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x2: | |
Lfunc_begin20: | |
.cfi_startproc | |
sub sp, sp, #32 | |
.cfi_def_cfa_offset 32 | |
ldr x8, [sp, #48] | |
movi.2d v0, #0000000000000000 | |
lsl x8, x8, #1 | |
cmp x2, #2 | |
b.hs LBB20_2 | |
movi.2d v1, #0000000000000000 | |
mov x9, x5 | |
b LBB20_4 | |
LBB20_2: | |
ldp x11, x9, [sp, #32] | |
lsr x10, x2, #1 | |
lsl x11, x11, #1 | |
lsl x12, x9, #1 | |
add x13, x8, x12 | |
lsl x14, x12, #1 | |
movi.2d v1, #0000000000000000 | |
mov x9, x5 | |
LBB20_3: | |
ld1r.8h { v2 }, [x9], x14 | |
ldr q3, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
add x15, x5, x8 | |
ld1r.8h { v2 }, [x15] | |
; InlineAsm Start | |
fmla.8h v0, v3, v2 | |
; InlineAsm End | |
add x15, x5, x12 | |
ld1r.8h { v2 }, [x15] | |
ldr q3, [x4, x11] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
add x15, x5, x13 | |
ld1r.8h { v2 }, [x15] | |
; InlineAsm Start | |
fmla.8h v0, v3, v2 | |
; InlineAsm End | |
add x4, x4, x11, lsl #1 | |
mov x5, x9 | |
subs x10, x10, #1 | |
b.ne LBB20_3 | |
LBB20_4: | |
tbz w2, #0, LBB20_6 | |
ld1r.8h { v2 }, [x9], x8 | |
ldr q3, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
ld1r.8h { v2 }, [x9] | |
; InlineAsm Start | |
fmla.8h v0, v3, v2 | |
; InlineAsm End | |
ldrh w8, [sp, #58] | |
ldrh w9, [sp, #56] | |
ldrb w10, [sp, #60] | |
cmp x0, #8 | |
b.ne LBB20_12 | |
cmp x1, #2 | |
b.ne LBB20_12 | |
cmp x7, #1 | |
b.ne LBB20_12 | |
dup.8h v2, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB20_20 | |
cmp w8, #2 | |
b.ne LBB20_21 | |
dup.8h v3, w9 | |
ldr q4, [x3] | |
; InlineAsm Start | |
fmul.8h v5, v3, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v2, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v5, v4 | |
; InlineAsm End | |
str q1, [x3] | |
lsl x8, x6, #1 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v4, v3, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v2, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v4, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB20_35 | |
LBB20_12: | |
stp q1, q0, [sp] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB20_23 | |
cmp w10, #1 | |
b.ne LBB20_29 | |
cbz x1, LBB20_35 | |
cbz x0, LBB20_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB20_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB20_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB20_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB20_17 | |
b LBB20_35 | |
LBB20_20: | |
ldr q3, [x3] | |
; InlineAsm Start | |
fmla.8h v3, v2, v1 | |
; InlineAsm End | |
str q3, [x3] | |
lsl x8, x6, #1 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v2, v0 | |
; InlineAsm End | |
b LBB20_22 | |
LBB20_21: | |
; InlineAsm Start | |
fmul.8h v3, v2, v1 | |
; InlineAsm End | |
str q3, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v1, v2, v0 | |
; InlineAsm End | |
LBB20_22: | |
str q1, [x3, x8] | |
b LBB20_35 | |
LBB20_23: | |
cbz x1, LBB20_35 | |
cbz x0, LBB20_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB20_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB20_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB20_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB20_26 | |
b LBB20_35 | |
LBB20_29: | |
cbz x1, LBB20_35 | |
cbz x0, LBB20_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB20_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB20_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB20_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB20_32 | |
LBB20_35: | |
add sp, sp, #32 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end20: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x3: | |
Lfunc_begin21: | |
.cfi_startproc | |
sub sp, sp, #64 | |
.cfi_def_cfa_offset 64 | |
stp x20, x19, [sp, #48] | |
.cfi_offset w19, -8 | |
.cfi_offset w20, -16 | |
ldr x8, [sp, #80] | |
movi.2d v0, #0000000000000000 | |
lsl x9, x8, #2 | |
cmp x2, #2 | |
b.hs LBB21_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
mov x10, x5 | |
b LBB21_4 | |
LBB21_2: | |
lsr x11, x2, #1 | |
ldp x13, x10, [sp, #64] | |
lsl x12, x8, #1 | |
lsl x13, x13, #1 | |
lsl x14, x10, #1 | |
add x15, x9, x14 | |
add x16, x12, x14 | |
lsl x17, x14, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
mov x10, x5 | |
LBB21_3: | |
ld1r.8h { v3 }, [x10], x17 | |
ldr q4, [x4] | |
; InlineAsm Start | |
fmla.8h v2, v4, v3 | |
; InlineAsm End | |
add x19, x5, x12 | |
ld1r.8h { v3 }, [x19] | |
; InlineAsm Start | |
fmla.8h v1, v4, v3 | |
; InlineAsm End | |
add x19, x5, x9 | |
ld1r.8h { v3 }, [x19] | |
; InlineAsm Start | |
fmla.8h v0, v4, v3 | |
; InlineAsm End | |
add x19, x5, x14 | |
ld1r.8h { v3 }, [x19] | |
ldr q4, [x4, x13] | |
; InlineAsm Start | |
fmla.8h v2, v4, v3 | |
; InlineAsm End | |
add x19, x5, x16 | |
ld1r.8h { v3 }, [x19] | |
; InlineAsm Start | |
fmla.8h v1, v4, v3 | |
; InlineAsm End | |
add x5, x5, x15 | |
ld1r.8h { v3 }, [x5] | |
; InlineAsm Start | |
fmla.8h v0, v4, v3 | |
; InlineAsm End | |
add x4, x4, x13, lsl #1 | |
mov x5, x10 | |
subs x11, x11, #1 | |
b.ne LBB21_3 | |
LBB21_4: | |
tbz w2, #0, LBB21_6 | |
mov x11, x10 | |
ld1r.8h { v3 }, [x11], x9 | |
ldr q4, [x4] | |
; InlineAsm Start | |
fmla.8h v2, v4, v3 | |
; InlineAsm End | |
add x8, x10, x8, lsl #1 | |
ld1r.8h { v3 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v4, v3 | |
; InlineAsm End | |
ld1r.8h { v3 }, [x11] | |
; InlineAsm Start | |
fmla.8h v0, v4, v3 | |
; InlineAsm End | |
ldrh w8, [sp, #90] | |
ldrh w9, [sp, #88] | |
ldrb w10, [sp, #92] | |
cmp x0, #8 | |
b.ne LBB21_12 | |
cmp x1, #3 | |
b.ne LBB21_12 | |
cmp x7, #1 | |
b.ne LBB21_12 | |
dup.8h v3, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB21_20 | |
cmp w8, #2 | |
b.ne LBB21_21 | |
dup.8h v4, w9 | |
ldr q5, [x3] | |
; InlineAsm Start | |
fmul.8h v6, v4, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v3, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v6, v5 | |
; InlineAsm End | |
str q2, [x3] | |
lsl x8, x6, #1 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v5, v4, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v3, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v5, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v4, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v3, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB21_35 | |
LBB21_12: | |
stp q2, q1, [sp] | |
str q0, [sp, #32] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB21_23 | |
cmp w10, #1 | |
b.ne LBB21_29 | |
cbz x1, LBB21_35 | |
cbz x0, LBB21_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB21_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB21_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB21_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB21_17 | |
b LBB21_35 | |
LBB21_20: | |
ldr q4, [x3] | |
; InlineAsm Start | |
fmla.8h v4, v3, v2 | |
; InlineAsm End | |
str q4, [x3] | |
lsl x8, x6, #1 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v3, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v3, v0 | |
; InlineAsm End | |
b LBB21_22 | |
LBB21_21: | |
; InlineAsm Start | |
fmul.8h v4, v3, v2 | |
; InlineAsm End | |
str q4, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v2, v3, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v1, v3, v0 | |
; InlineAsm End | |
LBB21_22: | |
str q1, [x3, x8] | |
b LBB21_35 | |
LBB21_23: | |
cbz x1, LBB21_35 | |
cbz x0, LBB21_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB21_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB21_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB21_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB21_26 | |
b LBB21_35 | |
LBB21_29: | |
cbz x1, LBB21_35 | |
cbz x0, LBB21_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB21_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB21_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB21_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB21_32 | |
LBB21_35: | |
ldp x20, x19, [sp, #48] | |
add sp, sp, #64 | |
.cfi_def_cfa_offset 0 | |
.cfi_restore w19 | |
.cfi_restore w20 | |
ret | |
Lfunc_end21: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x4: | |
Lfunc_begin22: | |
.cfi_startproc | |
sub sp, sp, #96 | |
.cfi_def_cfa_offset 96 | |
stp x22, x21, [sp, #64] | |
stp x20, x19, [sp, #80] | |
.cfi_offset w19, -8 | |
.cfi_offset w20, -16 | |
.cfi_offset w21, -24 | |
.cfi_offset w22, -32 | |
ldp x11, x9, [sp, #104] | |
ldr x12, [sp, #96] | |
lsr x8, x2, #1 | |
movi.2d v0, #0000000000000000 | |
cmp x9, #1 | |
b.ne LBB22_3 | |
cmp x2, #2 | |
b.hs LBB22_5 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
b LBB22_7 | |
LBB22_3: | |
lsl x10, x9, #1 | |
cmp x2, #2 | |
b.hs LBB22_9 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
mov x11, x5 | |
b LBB22_11 | |
LBB22_5: | |
lsl x9, x12, #1 | |
lsl x10, x11, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
LBB22_6: | |
ldr q4, [x5] | |
dup.8h v5, v4[0] | |
ldr q6, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[1] | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[2] | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
dup.8h v4, v4[3] | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
ldr q4, [x5, x10] | |
dup.8h v5, v4[0] | |
ldr q6, [x4, x9] | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[1] | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[2] | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
dup.8h v4, v4[3] | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
add x4, x4, x9, lsl #1 | |
add x5, x5, x10, lsl #1 | |
subs x8, x8, #1 | |
b.ne LBB22_6 | |
LBB22_7: | |
tbz w2, #0, LBB22_13 | |
ldr q4, [x5] | |
dup.8h v5, v4[0] | |
ldr q6, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[1] | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
dup.8h v5, v4[2] | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
dup.8h v4, v4[3] | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
b LBB22_13 | |
LBB22_9: | |
lsl x12, x12, #1 | |
lsl x13, x11, #1 | |
add x11, x10, x9 | |
lsl x14, x11, #1 | |
add x15, x14, x13 | |
lsl x16, x9, #2 | |
add x17, x16, x13 | |
add x19, x10, x13 | |
lsl x20, x13, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
mov x11, x5 | |
LBB22_10: | |
ld1r.8h { v4 }, [x11], x20 | |
ldr q5, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
add x21, x5, x10 | |
ld1r.8h { v4 }, [x21] | |
; InlineAsm Start | |
fmla.8h v2, v5, v4 | |
; InlineAsm End | |
add x21, x5, x16 | |
ld1r.8h { v4 }, [x21] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
add x21, x5, x14 | |
ld1r.8h { v4 }, [x21] | |
; InlineAsm Start | |
fmla.8h v0, v5, v4 | |
; InlineAsm End | |
add x21, x5, x13 | |
ld1r.8h { v4 }, [x21] | |
ldr q5, [x4, x12] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
add x21, x5, x19 | |
ld1r.8h { v4 }, [x21] | |
; InlineAsm Start | |
fmla.8h v2, v5, v4 | |
; InlineAsm End | |
add x21, x5, x17 | |
ld1r.8h { v4 }, [x21] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
add x5, x5, x15 | |
ld1r.8h { v4 }, [x5] | |
; InlineAsm Start | |
fmla.8h v0, v5, v4 | |
; InlineAsm End | |
add x4, x4, x12, lsl #1 | |
mov x5, x11 | |
subs x8, x8, #1 | |
b.ne LBB22_10 | |
LBB22_11: | |
tbz w2, #0, LBB22_13 | |
add x8, x10, x9 | |
lsl x8, x8, #1 | |
mov x12, x11 | |
ld1r.8h { v4 }, [x12], x8 | |
ldr q5, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
add x8, x11, x10 | |
ld1r.8h { v4 }, [x8] | |
; InlineAsm Start | |
fmla.8h v2, v5, v4 | |
; InlineAsm End | |
add x8, x11, x9, lsl #2 | |
ld1r.8h { v4 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
ld1r.8h { v4 }, [x12] | |
; InlineAsm Start | |
fmla.8h v0, v5, v4 | |
; InlineAsm End | |
LBB22_13: | |
ldrh w8, [sp, #122] | |
ldrh w9, [sp, #120] | |
ldrb w10, [sp, #124] | |
cmp x0, #8 | |
b.ne LBB22_19 | |
cmp x1, #4 | |
b.ne LBB22_19 | |
cmp x7, #1 | |
b.ne LBB22_19 | |
dup.8h v4, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB22_27 | |
cmp w8, #2 | |
b.ne LBB22_28 | |
dup.8h v5, w9 | |
ldr q6, [x3] | |
; InlineAsm Start | |
fmul.8h v7, v5, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v4, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v7, v6 | |
; InlineAsm End | |
str q3, [x3] | |
lsl x8, x6, #1 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v6, v5, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v4, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v6, v3 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v3, v5, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v4, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v3, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v5, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v4, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB22_42 | |
LBB22_19: | |
stp q3, q2, [sp] | |
stp q1, q0, [sp, #32] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB22_30 | |
cmp w10, #1 | |
b.ne LBB22_36 | |
cbz x1, LBB22_42 | |
cbz x0, LBB22_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB22_24: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB22_25: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB22_25 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB22_24 | |
b LBB22_42 | |
LBB22_27: | |
ldr q5, [x3] | |
; InlineAsm Start | |
fmla.8h v5, v4, v3 | |
; InlineAsm End | |
str q5, [x3] | |
lsl x8, x6, #1 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v3, v4, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v4, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v4, v0 | |
; InlineAsm End | |
b LBB22_29 | |
LBB22_28: | |
; InlineAsm Start | |
fmul.8h v5, v4, v3 | |
; InlineAsm End | |
str q5, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v3, v4, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v2, v4, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v1, v4, v0 | |
; InlineAsm End | |
LBB22_29: | |
str q1, [x3, x8] | |
b LBB22_42 | |
LBB22_30: | |
cbz x1, LBB22_42 | |
cbz x0, LBB22_42 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB22_33: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB22_34: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB22_34 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB22_33 | |
b LBB22_42 | |
LBB22_36: | |
cbz x1, LBB22_42 | |
cbz x0, LBB22_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB22_39: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB22_40: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB22_40 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB22_39 | |
LBB22_42: | |
ldp x20, x19, [sp, #80] | |
ldp x22, x21, [sp, #64] | |
add sp, sp, #96 | |
.cfi_def_cfa_offset 0 | |
.cfi_restore w19 | |
.cfi_restore w20 | |
.cfi_restore w21 | |
.cfi_restore w22 | |
ret | |
Lfunc_end22: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x5: | |
Lfunc_begin23: | |
.cfi_startproc | |
sub sp, sp, #80 | |
.cfi_def_cfa_offset 80 | |
ldr x8, [sp, #96] | |
movi.2d v0, #0000000000000000 | |
cmp x2, #2 | |
b.hs LBB23_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
tbnz w2, #0, LBB23_5 | |
b LBB23_6 | |
LBB23_2: | |
lsr x9, x2, #1 | |
ldp x11, x12, [sp, #80] | |
lsl x10, x8, #1 | |
lsl x11, x11, #1 | |
lsl x13, x12, #1 | |
sub x12, x13, x8, lsl #3 | |
lsl x13, x13, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
LBB23_3: | |
mov x14, x5 | |
ld1r.8h { v5 }, [x14], x13 | |
ldr q6, [x4] | |
; InlineAsm Start | |
fmla.8h v4, v6, v5 | |
; InlineAsm End | |
add x5, x5, x10 | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
ldr q6, [x4, x11] | |
; InlineAsm Start | |
fmla.8h v4, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v6, v5 | |
; InlineAsm End | |
add x4, x4, x11, lsl #1 | |
subs x9, x9, #1 | |
b.ne LBB23_3 | |
mov x5, x14 | |
tbz w2, #0, LBB23_6 | |
lsl x9, x8, #3 | |
mov x10, x5 | |
ld1r.8h { v5 }, [x10], x9 | |
ldr q6, [x4] | |
; InlineAsm Start | |
fmla.8h v4, v6, v5 | |
; InlineAsm End | |
add x9, x5, x8, lsl #1 | |
ld1r.8h { v5 }, [x9] | |
; InlineAsm Start | |
fmla.8h v3, v6, v5 | |
; InlineAsm End | |
add x9, x5, x8, lsl #2 | |
ld1r.8h { v5 }, [x9] | |
; InlineAsm Start | |
fmla.8h v2, v6, v5 | |
; InlineAsm End | |
mov w9, #6 | |
madd x8, x8, x9, x5 | |
ld1r.8h { v5 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v6, v5 | |
; InlineAsm End | |
ld1r.8h { v5 }, [x10] | |
; InlineAsm Start | |
fmla.8h v0, v6, v5 | |
; InlineAsm End | |
LBB23_6: | |
ldrh w8, [sp, #106] | |
ldrh w9, [sp, #104] | |
ldrb w10, [sp, #108] | |
cmp x0, #8 | |
b.ne LBB23_12 | |
cmp x1, #5 | |
b.ne LBB23_12 | |
cmp x7, #1 | |
b.ne LBB23_12 | |
dup.8h v5, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB23_20 | |
cmp w8, #2 | |
b.ne LBB23_21 | |
dup.8h v6, w9 | |
ldr q7, [x3] | |
; InlineAsm Start | |
fmul.8h v16, v6, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v7, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v16, v7 | |
; InlineAsm End | |
str q4, [x3] | |
lsl x8, x6, #1 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v7, v6, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v5, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v7, v4 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v4, v6, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v5, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v3 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v3, v6, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v5, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v3, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v6, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v5, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB23_35 | |
LBB23_12: | |
stp q4, q3, [sp] | |
stp q2, q1, [sp, #32] | |
and w10, w10, #0xff | |
str q0, [sp, #64] | |
cmp w10, #2 | |
b.eq LBB23_23 | |
cmp w10, #1 | |
b.ne LBB23_29 | |
cbz x1, LBB23_35 | |
cbz x0, LBB23_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB23_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB23_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB23_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB23_17 | |
b LBB23_35 | |
LBB23_20: | |
ldr q6, [x3] | |
; InlineAsm Start | |
fmla.8h v6, v5, v4 | |
; InlineAsm End | |
str q6, [x3] | |
lsl x8, x6, #1 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v4, v5, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v3, v5, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v5, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v5, v0 | |
; InlineAsm End | |
b LBB23_22 | |
LBB23_21: | |
; InlineAsm Start | |
fmul.8h v6, v5, v4 | |
; InlineAsm End | |
str q6, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v4, v5, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v3, v5, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v2, v5, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #3 | |
; InlineAsm Start | |
fmul.8h v1, v5, v0 | |
; InlineAsm End | |
LBB23_22: | |
str q1, [x3, x8] | |
b LBB23_35 | |
LBB23_23: | |
cbz x1, LBB23_35 | |
cbz x0, LBB23_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB23_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB23_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB23_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB23_26 | |
b LBB23_35 | |
LBB23_29: | |
cbz x1, LBB23_35 | |
cbz x0, LBB23_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB23_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB23_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB23_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB23_32 | |
LBB23_35: | |
add sp, sp, #80 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end23: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x6: | |
Lfunc_begin24: | |
.cfi_startproc | |
sub sp, sp, #96 | |
.cfi_def_cfa_offset 96 | |
ldr x8, [sp, #112] | |
movi.2d v0, #0000000000000000 | |
cmp x2, #2 | |
b.hs LBB24_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
tbnz w2, #0, LBB24_5 | |
b LBB24_6 | |
LBB24_2: | |
ldp x11, x12, [sp, #96] | |
lsr x9, x2, #1 | |
lsl x10, x8, #1 | |
lsl x11, x11, #1 | |
lsl x13, x12, #1 | |
mov w12, #10 | |
msub x12, x8, x12, x13 | |
lsl x13, x13, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
LBB24_3: | |
mov x14, x5 | |
ld1r.8h { v6 }, [x14], x13 | |
ldr q7, [x4] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
add x5, x5, x10 | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v4, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
ldr q7, [x4, x11] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v4, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v7, v6 | |
; InlineAsm End | |
add x4, x4, x11, lsl #1 | |
subs x9, x9, #1 | |
b.ne LBB24_3 | |
mov x5, x14 | |
tbz w2, #0, LBB24_6 | |
lsl x9, x8, #2 | |
add x10, x9, x8 | |
lsl x10, x10, #1 | |
mov x11, x5 | |
ld1r.8h { v6 }, [x11], x10 | |
ldr q7, [x4] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
add x10, x5, x8, lsl #1 | |
ld1r.8h { v6 }, [x10] | |
; InlineAsm Start | |
fmla.8h v4, v7, v6 | |
; InlineAsm End | |
add x9, x5, x9 | |
ld1r.8h { v6 }, [x9] | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
mov w9, #6 | |
madd x9, x8, x9, x5 | |
ld1r.8h { v6 }, [x9] | |
; InlineAsm Start | |
fmla.8h v2, v7, v6 | |
; InlineAsm End | |
add x8, x5, x8, lsl #3 | |
ld1r.8h { v6 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x11] | |
; InlineAsm Start | |
fmla.8h v0, v7, v6 | |
; InlineAsm End | |
LBB24_6: | |
ldrh w8, [sp, #122] | |
ldrh w9, [sp, #120] | |
ldrb w10, [sp, #124] | |
cmp x0, #8 | |
b.ne LBB24_12 | |
cmp x1, #6 | |
b.ne LBB24_12 | |
cmp x7, #1 | |
b.ne LBB24_12 | |
dup.8h v6, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB24_20 | |
cmp w8, #2 | |
b.ne LBB24_21 | |
dup.8h v7, w9 | |
ldr q16, [x3] | |
; InlineAsm Start | |
fmul.8h v17, v7, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v16, v6, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v5, v17, v16 | |
; InlineAsm End | |
str q5, [x3] | |
lsl x8, x6, #1 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v16, v7, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v6, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v16, v5 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v5, v7, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v6, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v5, v4 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v4, v7, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v6, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v3 | |
; InlineAsm End | |
str q2, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v3, v7, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v6, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v3, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v7, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v6, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB24_35 | |
LBB24_12: | |
stp q5, q4, [sp] | |
stp q3, q2, [sp, #32] | |
and w10, w10, #0xff | |
stp q1, q0, [sp, #64] | |
cmp w10, #2 | |
b.eq LBB24_23 | |
cmp w10, #1 | |
b.ne LBB24_29 | |
cbz x1, LBB24_35 | |
cbz x0, LBB24_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB24_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB24_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB24_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB24_17 | |
b LBB24_35 | |
LBB24_20: | |
ldr q7, [x3] | |
; InlineAsm Start | |
fmla.8h v7, v6, v5 | |
; InlineAsm End | |
str q7, [x3] | |
lsl x8, x6, #1 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v5, v6, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v4, v6, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v3, v6, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v6, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v6, v0 | |
; InlineAsm End | |
b LBB24_22 | |
LBB24_21: | |
; InlineAsm Start | |
fmul.8h v7, v6, v5 | |
; InlineAsm End | |
str q7, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v5, v6, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v4, v6, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v3, v6, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #3 | |
; InlineAsm Start | |
fmul.8h v2, v6, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v1, v6, v0 | |
; InlineAsm End | |
LBB24_22: | |
str q1, [x3, x8] | |
b LBB24_35 | |
LBB24_23: | |
cbz x1, LBB24_35 | |
cbz x0, LBB24_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB24_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB24_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB24_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB24_26 | |
b LBB24_35 | |
LBB24_29: | |
cbz x1, LBB24_35 | |
cbz x0, LBB24_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB24_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB24_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB24_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB24_32 | |
LBB24_35: | |
add sp, sp, #96 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end24: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x7: | |
Lfunc_begin25: | |
.cfi_startproc | |
sub sp, sp, #112 | |
.cfi_def_cfa_offset 112 | |
ldr x8, [sp, #128] | |
movi.2d v0, #0000000000000000 | |
lsl x9, x8, #1 | |
cmp x2, #2 | |
b.hs LBB25_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
tbnz w2, #0, LBB25_5 | |
b LBB25_6 | |
LBB25_2: | |
ldp x11, x12, [sp, #112] | |
lsr x10, x2, #1 | |
lsl x11, x11, #1 | |
lsl x13, x12, #1 | |
mov w12, #12 | |
msub x12, x8, x12, x13 | |
lsl x13, x13, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
LBB25_3: | |
mov x14, x5 | |
ld1r.8h { v7 }, [x14], x13 | |
ldr q16, [x4] | |
; InlineAsm Start | |
fmla.8h v6, v16, v7 | |
; InlineAsm End | |
add x5, x5, x9 | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v5, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v4, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v3, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v2, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v1, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
ldr q16, [x4, x11] | |
; InlineAsm Start | |
fmla.8h v6, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v5, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v4, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v3, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v2, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x9 | |
; InlineAsm Start | |
fmla.8h v1, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v16, v7 | |
; InlineAsm End | |
add x4, x4, x11, lsl #1 | |
subs x10, x10, #1 | |
b.ne LBB25_3 | |
mov x5, x14 | |
tbz w2, #0, LBB25_6 | |
add x10, x9, x8 | |
lsl x10, x10, #2 | |
mov x11, x5 | |
ld1r.8h { v7 }, [x11], x10 | |
ldr q16, [x4] | |
; InlineAsm Start | |
fmla.8h v6, v16, v7 | |
; InlineAsm End | |
add x9, x5, x9 | |
ld1r.8h { v7 }, [x9] | |
; InlineAsm Start | |
fmla.8h v5, v16, v7 | |
; InlineAsm End | |
add x9, x5, x8, lsl #2 | |
ld1r.8h { v7 }, [x9] | |
; InlineAsm Start | |
fmla.8h v4, v16, v7 | |
; InlineAsm End | |
mov w9, #6 | |
madd x9, x8, x9, x5 | |
ld1r.8h { v7 }, [x9] | |
; InlineAsm Start | |
fmla.8h v3, v16, v7 | |
; InlineAsm End | |
add x9, x5, x8, lsl #3 | |
ld1r.8h { v7 }, [x9] | |
; InlineAsm Start | |
fmla.8h v2, v16, v7 | |
; InlineAsm End | |
mov w9, #10 | |
madd x8, x8, x9, x5 | |
ld1r.8h { v7 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v16, v7 | |
; InlineAsm End | |
ld1r.8h { v7 }, [x11] | |
; InlineAsm Start | |
fmla.8h v0, v16, v7 | |
; InlineAsm End | |
LBB25_6: | |
ldrh w8, [sp, #138] | |
ldrh w9, [sp, #136] | |
ldrb w10, [sp, #140] | |
cmp x0, #8 | |
b.ne LBB25_12 | |
cmp x1, #7 | |
b.ne LBB25_12 | |
cmp x7, #1 | |
b.ne LBB25_12 | |
dup.8h v7, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB25_20 | |
cmp w8, #2 | |
b.ne LBB25_21 | |
dup.8h v16, w9 | |
ldr q17, [x3] | |
; InlineAsm Start | |
fmul.8h v18, v16, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v17, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v6, v18, v17 | |
; InlineAsm End | |
str q6, [x3] | |
lsl x8, x6, #1 | |
ldr q6, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v17, v16, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v7, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v5, v17, v6 | |
; InlineAsm End | |
str q5, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v6, v16, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v7, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v6, v5 | |
; InlineAsm End | |
str q4, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v5, v16, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v7, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v5, v4 | |
; InlineAsm End | |
str q3, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v4, v16, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v7, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v3 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v3, v16, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v7, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v3, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v16, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v7, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB25_35 | |
LBB25_12: | |
stp q6, q5, [sp] | |
stp q4, q3, [sp, #32] | |
stp q2, q1, [sp, #64] | |
and w10, w10, #0xff | |
str q0, [sp, #96] | |
cmp w10, #2 | |
b.eq LBB25_23 | |
cmp w10, #1 | |
b.ne LBB25_29 | |
cbz x1, LBB25_35 | |
cbz x0, LBB25_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB25_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB25_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB25_18 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB25_17 | |
b LBB25_35 | |
LBB25_20: | |
ldr q16, [x3] | |
; InlineAsm Start | |
fmla.8h v16, v7, v6 | |
; InlineAsm End | |
str q16, [x3] | |
lsl x8, x6, #1 | |
ldr q6, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v6, v7, v5 | |
; InlineAsm End | |
str q6, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v5, v7, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v4, v7, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v3, v7, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v7, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v7, v0 | |
; InlineAsm End | |
b LBB25_22 | |
LBB25_21: | |
; InlineAsm Start | |
fmul.8h v16, v7, v6 | |
; InlineAsm End | |
str q16, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v6, v7, v5 | |
; InlineAsm End | |
str q6, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v5, v7, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v4, v7, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #3 | |
; InlineAsm Start | |
fmul.8h v3, v7, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v2, v7, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v1, v7, v0 | |
; InlineAsm End | |
LBB25_22: | |
str q1, [x3, x8] | |
b LBB25_35 | |
LBB25_23: | |
cbz x1, LBB25_35 | |
cbz x0, LBB25_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB25_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB25_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB25_27 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB25_26 | |
b LBB25_35 | |
LBB25_29: | |
cbz x1, LBB25_35 | |
cbz x0, LBB25_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB25_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB25_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB25_33 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB25_32 | |
LBB25_35: | |
add sp, sp, #112 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end25: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x1x8: | |
Lfunc_begin26: | |
.cfi_startproc | |
sub sp, sp, #128 | |
.cfi_def_cfa_offset 128 | |
ldp x12, x9, [sp, #136] | |
ldr x11, [sp, #128] | |
lsr x8, x2, #1 | |
movi.2d v0, #0000000000000000 | |
cmp x9, #1 | |
b.ne LBB26_3 | |
cmp x2, #2 | |
b.hs LBB26_5 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
tbnz w2, #0, LBB26_8 | |
b LBB26_13 | |
LBB26_3: | |
lsl x10, x9, #1 | |
cmp x2, #2 | |
b.hs LBB26_9 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
tbnz w2, #0, LBB26_12 | |
b LBB26_13 | |
LBB26_5: | |
lsl x9, x11, #1 | |
movi.2d v1, #0000000000000000 | |
lsl x10, x12, #1 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
LBB26_6: | |
mov x11, x5 | |
ldr q16, [x5] | |
dup.8h v17, v16[0] | |
ldr q18, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v6, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v5, v18, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
ldur q16, [x5, #8] | |
dup.8h v17, v16[0] | |
; InlineAsm Start | |
fmla.8h v3, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v2, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v1, v18, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
add x12, x5, x10 | |
add x5, x12, x10 | |
ldr q16, [x12] | |
dup.8h v17, v16[0] | |
ldr q18, [x4, x9] | |
; InlineAsm Start | |
fmla.8h v7, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v6, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v5, v18, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
ldur q16, [x12, #8] | |
dup.8h v17, v16[0] | |
; InlineAsm Start | |
fmla.8h v3, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v2, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v1, v18, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
add x4, x4, x9, lsl #1 | |
subs x8, x8, #1 | |
b.ne LBB26_6 | |
add x5, x11, x10, lsl #1 | |
tbz w2, #0, LBB26_13 | |
ldr q16, [x5] | |
dup.8h v17, v16[0] | |
ldr q18, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
dup.8h v19, v16[2] | |
; InlineAsm Start | |
fmla.8h v6, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v5, v18, v19 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
ldur q16, [x5, #8] | |
dup.8h v17, v16[0] | |
; InlineAsm Start | |
fmla.8h v3, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v2, v18, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v1, v18, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
b LBB26_13 | |
LBB26_9: | |
lsl x11, x11, #1 | |
lsl x13, x12, #1 | |
mov w12, #14 | |
movi.2d v1, #0000000000000000 | |
msub x12, x9, x12, x13 | |
lsl x13, x13, #1 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
LBB26_10: | |
mov x14, x5 | |
ld1r.8h { v16 }, [x14], x13 | |
ldr q17, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
add x5, x5, x10 | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v6, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v4, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
ldr q17, [x4, x11] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v6, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v4, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v2, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x10 | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x5], x12 | |
; InlineAsm Start | |
fmla.8h v0, v17, v16 | |
; InlineAsm End | |
add x4, x4, x11, lsl #1 | |
subs x8, x8, #1 | |
b.ne LBB26_10 | |
mov x5, x14 | |
tbz w2, #0, LBB26_13 | |
lsl x8, x9, #4 | |
sub x8, x8, x10 | |
mov x11, x5 | |
ld1r.8h { v16 }, [x11], x8 | |
ldr q17, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
add x8, x5, x10 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v6, v17, v16 | |
; InlineAsm End | |
add x8, x5, x9, lsl #2 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
mov w8, #6 | |
madd x8, x9, x8, x5 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v4, v17, v16 | |
; InlineAsm End | |
add x8, x5, x9, lsl #3 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
mov w8, #10 | |
madd x8, x9, x8, x5 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v2, v17, v16 | |
; InlineAsm End | |
mov w8, #12 | |
madd x8, x9, x8, x5 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x11] | |
; InlineAsm Start | |
fmla.8h v0, v17, v16 | |
; InlineAsm End | |
LBB26_13: | |
ldrh w8, [sp, #154] | |
ldrh w9, [sp, #152] | |
ldrb w10, [sp, #156] | |
cmp x0, #8 | |
b.ne LBB26_19 | |
cmp x1, #8 | |
b.ne LBB26_19 | |
cmp x7, #1 | |
b.ne LBB26_19 | |
dup.8h v16, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB26_27 | |
cmp w8, #2 | |
b.ne LBB26_28 | |
dup.8h v17, w9 | |
ldr q18, [x3] | |
; InlineAsm Start | |
fmul.8h v19, v17, v18 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v18, v16, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v7, v19, v18 | |
; InlineAsm End | |
str q7, [x3] | |
lsl x8, x6, #1 | |
ldr q7, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v18, v17, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v7, v16, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v6, v18, v7 | |
; InlineAsm End | |
str q6, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q6, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v7, v17, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v16, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v5, v7, v6 | |
; InlineAsm End | |
str q5, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v6, v17, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v16, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v6, v5 | |
; InlineAsm End | |
str q4, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v5, v17, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v16, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v5, v4 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v4, v17, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v16, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v3 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v3, v17, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v16, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v3, v2 | |
; InlineAsm End | |
str q1, [x3, x8] | |
mov w8, #14 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmul.8h v2, v17, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v16, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v1 | |
; InlineAsm End | |
str q0, [x3, x8] | |
b LBB26_42 | |
LBB26_19: | |
stp q7, q6, [sp] | |
stp q5, q4, [sp, #32] | |
stp q3, q2, [sp, #64] | |
and w10, w10, #0xff | |
stp q1, q0, [sp, #96] | |
cmp w10, #2 | |
b.eq LBB26_30 | |
cmp w10, #1 | |
b.ne LBB26_36 | |
cbz x1, LBB26_42 | |
cbz x0, LBB26_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB26_24: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB26_25: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB26_25 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB26_24 | |
b LBB26_42 | |
LBB26_27: | |
ldr q17, [x3] | |
; InlineAsm Start | |
fmla.8h v17, v16, v7 | |
; InlineAsm End | |
str q17, [x3] | |
lsl x8, x6, #1 | |
ldr q7, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v7, v16, v6 | |
; InlineAsm End | |
str q7, [x3, x8] | |
lsl x8, x6, #2 | |
ldr q6, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v6, v16, v5 | |
; InlineAsm End | |
str q6, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
ldr q5, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v5, v16, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
lsl x8, x6, #3 | |
ldr q4, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v4, v16, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
ldr q3, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v3, v16, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
ldr q2, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v2, v16, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #14 | |
mul x8, x6, x8 | |
ldr q1, [x3, x8] | |
; InlineAsm Start | |
fmla.8h v1, v16, v0 | |
; InlineAsm End | |
b LBB26_29 | |
LBB26_28: | |
; InlineAsm Start | |
fmul.8h v17, v16, v7 | |
; InlineAsm End | |
str q17, [x3] | |
lsl x8, x6, #1 | |
; InlineAsm Start | |
fmul.8h v7, v16, v6 | |
; InlineAsm End | |
str q7, [x3, x8] | |
lsl x8, x6, #2 | |
; InlineAsm Start | |
fmul.8h v6, v16, v5 | |
; InlineAsm End | |
str q6, [x3, x8] | |
mov w8, #6 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v5, v16, v4 | |
; InlineAsm End | |
str q5, [x3, x8] | |
lsl x8, x6, #3 | |
; InlineAsm Start | |
fmul.8h v4, v16, v3 | |
; InlineAsm End | |
str q4, [x3, x8] | |
mov w8, #10 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v3, v16, v2 | |
; InlineAsm End | |
str q3, [x3, x8] | |
mov w8, #12 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v2, v16, v1 | |
; InlineAsm End | |
str q2, [x3, x8] | |
mov w8, #14 | |
mul x8, x6, x8 | |
; InlineAsm Start | |
fmul.8h v1, v16, v0 | |
; InlineAsm End | |
LBB26_29: | |
str q1, [x3, x8] | |
b LBB26_42 | |
LBB26_30: | |
cbz x1, LBB26_42 | |
cbz x0, LBB26_42 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB26_33: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB26_34: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB26_34 | |
add x10, x10, #1 | |
add x13, x13, #16 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB26_33 | |
b LBB26_42 | |
LBB26_36: | |
cbz x1, LBB26_42 | |
cbz x0, LBB26_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB26_39: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB26_40: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB26_40 | |
add x9, x9, #1 | |
add x12, x12, #16 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB26_39 | |
LBB26_42: | |
add sp, sp, #128 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end26: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x2x1: | |
Lfunc_begin27: | |
.cfi_startproc | |
sub sp, sp, #32 | |
.cfi_def_cfa_offset 32 | |
movi.2d v0, #0000000000000000 | |
cmp x2, #2 | |
b.hs LBB27_2 | |
movi.2d v1, #0000000000000000 | |
tbnz w2, #0, LBB27_5 | |
b LBB27_6 | |
LBB27_2: | |
ldp x9, x10, [sp, #32] | |
lsr x8, x2, #1 | |
lsl x9, x9, #1 | |
lsl x10, x10, #1 | |
lsl x11, x10, #1 | |
movi.2d v1, #0000000000000000 | |
mov x12, x5 | |
LBB27_3: | |
mov x13, x4 | |
ld1r.8h { v2 }, [x12], x11 | |
ldp q3, q4, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v4, v2 | |
; InlineAsm End | |
add x14, x5, x10 | |
ld1r.8h { v2 }, [x14] | |
add x14, x4, x9 | |
add x4, x14, x9 | |
ldp q3, q4, [x14] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v4, v2 | |
; InlineAsm End | |
mov x5, x12 | |
subs x8, x8, #1 | |
b.ne LBB27_3 | |
add x4, x13, x9, lsl #1 | |
tbz w2, #0, LBB27_6 | |
ld1r.8h { v2 }, [x5] | |
ldp q3, q4, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v3, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v4, v2 | |
; InlineAsm End | |
LBB27_6: | |
ldrh w8, [sp, #58] | |
ldrh w9, [sp, #56] | |
ldrb w10, [sp, #60] | |
cmp x0, #16 | |
b.ne LBB27_12 | |
cmp x1, #1 | |
b.ne LBB27_12 | |
cmp x7, #1 | |
b.ne LBB27_12 | |
dup.8h v2, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB27_20 | |
cmp w8, #2 | |
b.ne LBB27_21 | |
dup.8h v3, w9 | |
ldp q4, q5, [x3] | |
; InlineAsm Start | |
fmul.8h v6, v3, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v2, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v6, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v3, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v2, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v4, v3 | |
; InlineAsm End | |
stp q1, q0, [x3] | |
b LBB27_35 | |
LBB27_12: | |
stp q1, q0, [sp] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB27_23 | |
cmp w10, #1 | |
b.ne LBB27_29 | |
cbz x1, LBB27_35 | |
cbz x0, LBB27_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB27_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB27_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB27_18 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB27_17 | |
b LBB27_35 | |
LBB27_20: | |
ldr q3, [x3] | |
; InlineAsm Start | |
fmla.8h v3, v2, v1 | |
; InlineAsm End | |
str q3, [x3] | |
ldr q1, [x3, #16] | |
; InlineAsm Start | |
fmla.8h v1, v2, v0 | |
; InlineAsm End | |
b LBB27_22 | |
LBB27_21: | |
; InlineAsm Start | |
fmul.8h v3, v2, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v2, v0 | |
; InlineAsm End | |
str q3, [x3] | |
LBB27_22: | |
str q1, [x3, #16] | |
b LBB27_35 | |
LBB27_23: | |
cbz x1, LBB27_35 | |
cbz x0, LBB27_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB27_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB27_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB27_27 | |
add x10, x10, #1 | |
add x13, x13, #32 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB27_26 | |
b LBB27_35 | |
LBB27_29: | |
cbz x1, LBB27_35 | |
cbz x0, LBB27_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB27_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB27_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB27_33 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB27_32 | |
LBB27_35: | |
add sp, sp, #32 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end27: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x2x2: | |
Lfunc_begin28: | |
.cfi_startproc | |
sub sp, sp, #64 | |
.cfi_def_cfa_offset 64 | |
ldr x8, [sp, #80] | |
movi.2d v0, #0000000000000000 | |
lsl x8, x8, #1 | |
cmp x2, #2 | |
b.hs LBB28_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
tbnz w2, #0, LBB28_5 | |
b LBB28_6 | |
LBB28_2: | |
ldp x10, x11, [sp, #64] | |
lsr x9, x2, #1 | |
lsl x10, x10, #1 | |
lsl x11, x11, #1 | |
add x12, x8, x11 | |
lsl x13, x11, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
mov x14, x5 | |
LBB28_3: | |
mov x15, x4 | |
ld1r.8h { v4 }, [x14], x13 | |
ldp q5, q6, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v6, v4 | |
; InlineAsm End | |
add x16, x5, x8 | |
ld1r.8h { v4 }, [x16] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
add x16, x5, x11 | |
ld1r.8h { v4 }, [x16] | |
add x16, x4, x10 | |
add x4, x16, x10 | |
ldp q5, q6, [x16] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v6, v4 | |
; InlineAsm End | |
add x16, x5, x12 | |
ld1r.8h { v4 }, [x16] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
mov x5, x14 | |
subs x9, x9, #1 | |
b.ne LBB28_3 | |
add x4, x15, x10, lsl #1 | |
tbz w2, #0, LBB28_6 | |
ld1r.8h { v4 }, [x5], x8 | |
ldp q5, q6, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v6, v4 | |
; InlineAsm End | |
ld1r.8h { v4 }, [x5] | |
; InlineAsm Start | |
fmla.8h v1, v5, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v6, v4 | |
; InlineAsm End | |
LBB28_6: | |
ldrh w8, [sp, #90] | |
ldrh w9, [sp, #88] | |
ldrb w10, [sp, #92] | |
cmp x0, #16 | |
b.ne LBB28_12 | |
cmp x1, #2 | |
b.ne LBB28_12 | |
cmp x7, #1 | |
b.ne LBB28_12 | |
dup.8h v4, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB28_20 | |
cmp w8, #2 | |
b.ne LBB28_21 | |
dup.8h v5, w9 | |
ldp q6, q7, [x3] | |
; InlineAsm Start | |
fmul.8h v16, v5, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v4, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v16, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v5, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v7, v4, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v6, v7 | |
; InlineAsm End | |
stp q3, q2, [x3] | |
add x8, x3, x6, lsl #1 | |
ldp q2, q3, [x8] | |
; InlineAsm Start | |
fmul.8h v6, v5, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v4, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v6, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v5, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v4, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v3 | |
; InlineAsm End | |
stp q1, q0, [x8] | |
b LBB28_35 | |
LBB28_12: | |
stp q3, q2, [sp] | |
stp q1, q0, [sp, #32] | |
and w10, w10, #0xff | |
cmp w10, #2 | |
b.eq LBB28_23 | |
cmp w10, #1 | |
b.ne LBB28_29 | |
cbz x1, LBB28_35 | |
cbz x0, LBB28_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB28_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB28_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB28_18 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB28_17 | |
b LBB28_35 | |
LBB28_20: | |
ldr q5, [x3] | |
; InlineAsm Start | |
fmla.8h v5, v4, v3 | |
; InlineAsm End | |
ldr q3, [x3, #16] | |
; InlineAsm Start | |
fmla.8h v3, v4, v2 | |
; InlineAsm End | |
stp q5, q3, [x3] | |
add x8, x3, x6, lsl #1 | |
ldr q2, [x8] | |
; InlineAsm Start | |
fmla.8h v2, v4, v1 | |
; InlineAsm End | |
str q2, [x8] | |
ldr q1, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v1, v4, v0 | |
; InlineAsm End | |
b LBB28_22 | |
LBB28_21: | |
; InlineAsm Start | |
fmul.8h v5, v4, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v4, v2 | |
; InlineAsm End | |
stp q5, q3, [x3] | |
add x8, x3, x6, lsl #1 | |
; InlineAsm Start | |
fmul.8h v2, v4, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v4, v0 | |
; InlineAsm End | |
str q2, [x8] | |
LBB28_22: | |
str q1, [x8, #16] | |
b LBB28_35 | |
LBB28_23: | |
cbz x1, LBB28_35 | |
cbz x0, LBB28_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB28_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB28_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB28_27 | |
add x10, x10, #1 | |
add x13, x13, #32 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB28_26 | |
b LBB28_35 | |
LBB28_29: | |
cbz x1, LBB28_35 | |
cbz x0, LBB28_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB28_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB28_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB28_33 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB28_32 | |
LBB28_35: | |
add sp, sp, #64 | |
.cfi_def_cfa_offset 0 | |
ret | |
Lfunc_end28: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x2x3: | |
Lfunc_begin29: | |
.cfi_startproc | |
sub sp, sp, #112 | |
.cfi_def_cfa_offset 112 | |
stp x20, x19, [sp, #96] | |
.cfi_offset w19, -8 | |
.cfi_offset w20, -16 | |
ldr x8, [sp, #128] | |
movi.2d v0, #0000000000000000 | |
lsl x9, x8, #2 | |
cmp x2, #2 | |
b.hs LBB29_2 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
tbnz w2, #0, LBB29_5 | |
b LBB29_6 | |
LBB29_2: | |
ldp x12, x13, [sp, #112] | |
lsr x10, x2, #1 | |
lsl x11, x8, #1 | |
lsl x12, x12, #1 | |
lsl x13, x13, #1 | |
add x14, x9, x13 | |
add x15, x11, x13 | |
lsl x16, x13, #1 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
mov x17, x5 | |
LBB29_3: | |
mov x19, x4 | |
ld1r.8h { v6 }, [x17], x16 | |
ldp q7, q16, [x4] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v16, v6 | |
; InlineAsm End | |
add x4, x5, x11 | |
ld1r.8h { v6 }, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v16, v6 | |
; InlineAsm End | |
add x4, x5, x9 | |
ld1r.8h { v6 }, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v16, v6 | |
; InlineAsm End | |
add x4, x5, x13 | |
ld1r.8h { v6 }, [x4] | |
add x20, x19, x12 | |
add x4, x20, x12 | |
ldp q7, q16, [x20] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v16, v6 | |
; InlineAsm End | |
add x20, x5, x15 | |
ld1r.8h { v6 }, [x20] | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v16, v6 | |
; InlineAsm End | |
add x5, x5, x14 | |
ld1r.8h { v6 }, [x5] | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v16, v6 | |
; InlineAsm End | |
mov x5, x17 | |
subs x10, x10, #1 | |
b.ne LBB29_3 | |
add x4, x19, x12, lsl #1 | |
tbz w2, #0, LBB29_6 | |
mov x10, x5 | |
ld1r.8h { v6 }, [x10], x9 | |
ldp q7, q16, [x4] | |
; InlineAsm Start | |
fmla.8h v5, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v16, v6 | |
; InlineAsm End | |
add x8, x5, x8, lsl #1 | |
ld1r.8h { v6 }, [x8] | |
; InlineAsm Start | |
fmla.8h v3, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v16, v6 | |
; InlineAsm End | |
ld1r.8h { v6 }, [x10] | |
; InlineAsm Start | |
fmla.8h v1, v7, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v16, v6 | |
; InlineAsm End | |
LBB29_6: | |
ldrh w8, [sp, #138] | |
ldrh w9, [sp, #136] | |
ldrb w10, [sp, #140] | |
cmp x0, #16 | |
b.ne LBB29_12 | |
cmp x1, #3 | |
b.ne LBB29_12 | |
cmp x7, #1 | |
b.ne LBB29_12 | |
dup.8h v6, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB29_20 | |
cmp w8, #2 | |
b.ne LBB29_21 | |
dup.8h v7, w9 | |
ldp q16, q17, [x3] | |
; InlineAsm Start | |
fmul.8h v18, v7, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v16, v6, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v5, v18, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v16, v7, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v17, v6, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v16, v17 | |
; InlineAsm End | |
stp q5, q4, [x3] | |
add x8, x3, x6, lsl #1 | |
ldp q4, q5, [x8] | |
; InlineAsm Start | |
fmul.8h v16, v7, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v6, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v16, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v7, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v6, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v5 | |
; InlineAsm End | |
stp q3, q2, [x8] | |
add x8, x3, x6, lsl #2 | |
ldp q2, q3, [x8] | |
; InlineAsm Start | |
fmul.8h v4, v7, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v6, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v4, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v7, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v6, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v3 | |
; InlineAsm End | |
stp q1, q0, [x8] | |
b LBB29_35 | |
LBB29_12: | |
stp q5, q4, [sp] | |
stp q3, q2, [sp, #32] | |
and w10, w10, #0xff | |
stp q1, q0, [sp, #64] | |
cmp w10, #2 | |
b.eq LBB29_23 | |
cmp w10, #1 | |
b.ne LBB29_29 | |
cbz x1, LBB29_35 | |
cbz x0, LBB29_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB29_17: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB29_18: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB29_18 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB29_17 | |
b LBB29_35 | |
LBB29_20: | |
ldr q7, [x3] | |
; InlineAsm Start | |
fmla.8h v7, v6, v5 | |
; InlineAsm End | |
ldr q5, [x3, #16] | |
; InlineAsm Start | |
fmla.8h v5, v6, v4 | |
; InlineAsm End | |
stp q7, q5, [x3] | |
add x8, x3, x6, lsl #1 | |
ldr q4, [x8] | |
; InlineAsm Start | |
fmla.8h v4, v6, v3 | |
; InlineAsm End | |
ldr q3, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v3, v6, v2 | |
; InlineAsm End | |
stp q4, q3, [x8] | |
add x8, x3, x6, lsl #2 | |
ldr q2, [x8] | |
; InlineAsm Start | |
fmla.8h v2, v6, v1 | |
; InlineAsm End | |
str q2, [x8] | |
ldr q1, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v1, v6, v0 | |
; InlineAsm End | |
b LBB29_22 | |
LBB29_21: | |
; InlineAsm Start | |
fmul.8h v7, v6, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v6, v4 | |
; InlineAsm End | |
stp q7, q5, [x3] | |
add x8, x3, x6, lsl #1 | |
; InlineAsm Start | |
fmul.8h v4, v6, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v6, v2 | |
; InlineAsm End | |
stp q4, q3, [x8] | |
add x8, x3, x6, lsl #2 | |
; InlineAsm Start | |
fmul.8h v2, v6, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v6, v0 | |
; InlineAsm End | |
str q2, [x8] | |
LBB29_22: | |
str q1, [x8, #16] | |
b LBB29_35 | |
LBB29_23: | |
cbz x1, LBB29_35 | |
cbz x0, LBB29_35 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB29_26: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB29_27: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB29_27 | |
add x10, x10, #1 | |
add x13, x13, #32 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB29_26 | |
b LBB29_35 | |
LBB29_29: | |
cbz x1, LBB29_35 | |
cbz x0, LBB29_35 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB29_32: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB29_33: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB29_33 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB29_32 | |
LBB29_35: | |
ldp x20, x19, [sp, #96] | |
add sp, sp, #112 | |
.cfi_def_cfa_offset 0 | |
.cfi_restore w19 | |
.cfi_restore w20 | |
ret | |
Lfunc_end29: | |
.cfi_endproc | |
.p2align 2 | |
gemm_f16::microkernel::neon::f16::x2x4: | |
Lfunc_begin30: | |
.cfi_startproc | |
sub sp, sp, #160 | |
.cfi_def_cfa_offset 160 | |
stp x22, x21, [sp, #128] | |
stp x20, x19, [sp, #144] | |
.cfi_offset w19, -8 | |
.cfi_offset w20, -16 | |
.cfi_offset w21, -24 | |
.cfi_offset w22, -32 | |
ldp x12, x9, [sp, #168] | |
ldr x11, [sp, #160] | |
lsr x8, x2, #1 | |
movi.2d v0, #0000000000000000 | |
cmp x9, #1 | |
b.ne LBB30_3 | |
cmp x2, #2 | |
b.hs LBB30_5 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
tbnz w2, #0, LBB30_8 | |
b LBB30_13 | |
LBB30_3: | |
lsl x10, x9, #1 | |
cmp x2, #2 | |
b.hs LBB30_9 | |
movi.2d v1, #0000000000000000 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
tbnz w2, #0, LBB30_12 | |
b LBB30_13 | |
LBB30_5: | |
lsl x9, x11, #1 | |
movi.2d v1, #0000000000000000 | |
lsl x10, x12, #1 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
LBB30_6: | |
mov x11, x4 | |
ldr q16, [x5] | |
dup.8h v17, v16[0] | |
ldp q18, q19, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v19, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v5, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v19, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v3, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v19, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v1, v18, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v19, v16 | |
; InlineAsm End | |
add x12, x4, x9 | |
add x4, x12, x9 | |
ldp q16, q17, [x12] | |
ldr q18, [x5, x10] | |
dup.8h v19, v18[0] | |
; InlineAsm Start | |
fmla.8h v7, v16, v19 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v17, v19 | |
; InlineAsm End | |
dup.8h v19, v18[1] | |
; InlineAsm Start | |
fmla.8h v5, v16, v19 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v17, v19 | |
; InlineAsm End | |
dup.8h v19, v18[2] | |
; InlineAsm Start | |
fmla.8h v3, v16, v19 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v17, v19 | |
; InlineAsm End | |
dup.8h v18, v18[3] | |
; InlineAsm Start | |
fmla.8h v1, v16, v18 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v17, v18 | |
; InlineAsm End | |
add x5, x5, x10, lsl #1 | |
subs x8, x8, #1 | |
b.ne LBB30_6 | |
add x4, x11, x9, lsl #1 | |
tbz w2, #0, LBB30_13 | |
ldr q16, [x5] | |
dup.8h v17, v16[0] | |
ldp q18, q19, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v19, v17 | |
; InlineAsm End | |
dup.8h v17, v16[1] | |
; InlineAsm Start | |
fmla.8h v5, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v19, v17 | |
; InlineAsm End | |
dup.8h v17, v16[2] | |
; InlineAsm Start | |
fmla.8h v3, v18, v17 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v19, v17 | |
; InlineAsm End | |
dup.8h v16, v16[3] | |
; InlineAsm Start | |
fmla.8h v1, v18, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v19, v16 | |
; InlineAsm End | |
b LBB30_13 | |
LBB30_9: | |
lsl x11, x11, #1 | |
lsl x12, x12, #1 | |
add x13, x10, x9 | |
lsl x13, x13, #1 | |
movi.2d v1, #0000000000000000 | |
add x14, x13, x12 | |
lsl x15, x9, #2 | |
add x16, x15, x12 | |
add x17, x10, x12 | |
lsl x19, x12, #1 | |
movi.2d v2, #0000000000000000 | |
movi.2d v3, #0000000000000000 | |
movi.2d v4, #0000000000000000 | |
movi.2d v5, #0000000000000000 | |
movi.2d v6, #0000000000000000 | |
movi.2d v7, #0000000000000000 | |
mov x20, x5 | |
LBB30_10: | |
mov x21, x4 | |
ld1r.8h { v16 }, [x20], x19 | |
ldp q17, q18, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v18, v16 | |
; InlineAsm End | |
add x4, x5, x10 | |
ld1r.8h { v16 }, [x4] | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
add x4, x5, x15 | |
ld1r.8h { v16 }, [x4] | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v18, v16 | |
; InlineAsm End | |
add x4, x5, x13 | |
ld1r.8h { v16 }, [x4] | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
add x4, x5, x12 | |
ld1r.8h { v16 }, [x4] | |
add x22, x21, x11 | |
add x4, x22, x11 | |
ldp q17, q18, [x22] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v18, v16 | |
; InlineAsm End | |
add x22, x5, x17 | |
ld1r.8h { v16 }, [x22] | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
add x22, x5, x16 | |
ld1r.8h { v16 }, [x22] | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v18, v16 | |
; InlineAsm End | |
add x5, x5, x14 | |
ld1r.8h { v16 }, [x5] | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
mov x5, x20 | |
subs x8, x8, #1 | |
b.ne LBB30_10 | |
add x4, x21, x11, lsl #1 | |
tbz w2, #0, LBB30_13 | |
add x8, x10, x9 | |
lsl x8, x8, #1 | |
mov x11, x5 | |
ld1r.8h { v16 }, [x11], x8 | |
ldp q17, q18, [x4] | |
; InlineAsm Start | |
fmla.8h v7, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v6, v18, v16 | |
; InlineAsm End | |
add x8, x5, x10 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v5, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v4, v18, v16 | |
; InlineAsm End | |
add x8, x5, x9, lsl #2 | |
ld1r.8h { v16 }, [x8] | |
; InlineAsm Start | |
fmla.8h v3, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v2, v18, v16 | |
; InlineAsm End | |
ld1r.8h { v16 }, [x11] | |
; InlineAsm Start | |
fmla.8h v1, v17, v16 | |
; InlineAsm End | |
; InlineAsm Start | |
fmla.8h v0, v18, v16 | |
; InlineAsm End | |
LBB30_13: | |
ldrh w8, [sp, #186] | |
ldrh w9, [sp, #184] | |
ldrb w10, [sp, #188] | |
cmp x0, #16 | |
b.ne LBB30_19 | |
cmp x1, #4 | |
b.ne LBB30_19 | |
cmp x7, #1 | |
b.ne LBB30_19 | |
dup.8h v16, w8 | |
and w8, w10, #0xff | |
cmp w8, #1 | |
b.eq LBB30_27 | |
cmp w8, #2 | |
b.ne LBB30_28 | |
dup.8h v17, w9 | |
ldp q18, q19, [x3] | |
; InlineAsm Start | |
fmul.8h v20, v17, v18 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v18, v16, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v7, v20, v18 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v18, v17, v19 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v19, v16, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v6, v18, v19 | |
; InlineAsm End | |
stp q7, q6, [x3] | |
add x8, x3, x6, lsl #1 | |
ldp q6, q7, [x8] | |
; InlineAsm Start | |
fmul.8h v18, v17, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v16, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v5, v18, v6 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v6, v17, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v7, v16, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v4, v6, v7 | |
; InlineAsm End | |
stp q5, q4, [x8] | |
add x8, x3, x6, lsl #2 | |
ldp q4, q5, [x8] | |
; InlineAsm Start | |
fmul.8h v6, v17, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v16, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v3, v6, v4 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v4, v17, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v16, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v2, v4, v5 | |
; InlineAsm End | |
stp q3, q2, [x8] | |
mov w8, #6 | |
madd x8, x6, x8, x3 | |
ldp q2, q3, [x8] | |
; InlineAsm Start | |
fmul.8h v4, v17, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v16, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v1, v4, v2 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v2, v17, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v16, v0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd.8h v0, v2, v3 | |
; InlineAsm End | |
stp q1, q0, [x8] | |
b LBB30_42 | |
LBB30_19: | |
stp q7, q6, [sp] | |
stp q5, q4, [sp, #32] | |
stp q3, q2, [sp, #64] | |
and w10, w10, #0xff | |
stp q1, q0, [sp, #96] | |
cmp w10, #2 | |
b.eq LBB30_30 | |
cmp w10, #1 | |
b.ne LBB30_36 | |
cbz x1, LBB30_42 | |
cbz x0, LBB30_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB30_24: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB30_25: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x14] | |
; InlineAsm Start | |
fadd h1, h0, h2 | |
; InlineAsm End | |
str h1, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB30_25 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB30_24 | |
b LBB30_42 | |
LBB30_27: | |
ldr q17, [x3] | |
; InlineAsm Start | |
fmla.8h v17, v16, v7 | |
; InlineAsm End | |
ldr q7, [x3, #16] | |
; InlineAsm Start | |
fmla.8h v7, v16, v6 | |
; InlineAsm End | |
stp q17, q7, [x3] | |
add x8, x3, x6, lsl #1 | |
ldr q6, [x8] | |
; InlineAsm Start | |
fmla.8h v6, v16, v5 | |
; InlineAsm End | |
ldr q5, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v5, v16, v4 | |
; InlineAsm End | |
stp q6, q5, [x8] | |
add x8, x3, x6, lsl #2 | |
ldr q4, [x8] | |
; InlineAsm Start | |
fmla.8h v4, v16, v3 | |
; InlineAsm End | |
ldr q3, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v3, v16, v2 | |
; InlineAsm End | |
stp q4, q3, [x8] | |
mov w8, #6 | |
madd x8, x6, x8, x3 | |
ldr q2, [x8] | |
; InlineAsm Start | |
fmla.8h v2, v16, v1 | |
; InlineAsm End | |
str q2, [x8] | |
ldr q1, [x8, #16] | |
; InlineAsm Start | |
fmla.8h v1, v16, v0 | |
; InlineAsm End | |
b LBB30_29 | |
LBB30_28: | |
; InlineAsm Start | |
fmul.8h v17, v16, v7 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v7, v16, v6 | |
; InlineAsm End | |
stp q17, q7, [x3] | |
add x8, x3, x6, lsl #1 | |
; InlineAsm Start | |
fmul.8h v6, v16, v5 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v5, v16, v4 | |
; InlineAsm End | |
stp q6, q5, [x8] | |
add x8, x3, x6, lsl #2 | |
; InlineAsm Start | |
fmul.8h v4, v16, v3 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v3, v16, v2 | |
; InlineAsm End | |
stp q4, q3, [x8] | |
mov w8, #6 | |
madd x8, x6, x8, x3 | |
; InlineAsm Start | |
fmul.8h v2, v16, v1 | |
; InlineAsm End | |
; InlineAsm Start | |
fmul.8h v1, v16, v0 | |
; InlineAsm End | |
str q2, [x8] | |
LBB30_29: | |
str q1, [x8, #16] | |
b LBB30_42 | |
LBB30_30: | |
cbz x1, LBB30_42 | |
cbz x0, LBB30_42 | |
mov x10, #0 | |
lsl x11, x6, #1 | |
lsl x12, x7, #1 | |
mov x13, sp | |
LBB30_33: | |
mov x14, x0 | |
mov x15, x3 | |
mov x16, x13 | |
LBB30_34: | |
ldr h0, [x15] | |
fmov s1, w9 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
ldr h0, [x16], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h3, h1, h0 | |
; InlineAsm End | |
; InlineAsm Start | |
fadd h0, h2, h3 | |
; InlineAsm End | |
str h0, [x15] | |
add x15, x15, x12 | |
subs x14, x14, #1 | |
b.ne LBB30_34 | |
add x10, x10, #1 | |
add x13, x13, #32 | |
add x3, x3, x11 | |
cmp x10, x1 | |
b.ne LBB30_33 | |
b LBB30_42 | |
LBB30_36: | |
cbz x1, LBB30_42 | |
cbz x0, LBB30_42 | |
mov x9, #0 | |
lsl x10, x6, #1 | |
lsl x11, x7, #1 | |
mov x12, sp | |
LBB30_39: | |
mov x13, x0 | |
mov x14, x3 | |
mov x15, x12 | |
LBB30_40: | |
ldr h0, [x15], #2 | |
fmov s1, w8 | |
; InlineAsm Start | |
fmul h2, h1, h0 | |
; InlineAsm End | |
str h2, [x14] | |
add x14, x14, x11 | |
subs x13, x13, #1 | |
b.ne LBB30_40 | |
add x9, x9, #1 | |
add x12, x12, #32 | |
add x3, x3, x10 | |
cmp x9, x1 | |
b.ne LBB30_39 | |
LBB30_42: | |
ldp x20, x19, [sp, #144] | |
ldp x22, x21, [sp, #128] | |
add sp, sp, #160 | |
.cfi_def_cfa_offset 0 | |
.cfi_restore w19 | |
.cfi_restore w20 | |
.cfi_restore w21 | |
.cfi_restore w22 | |
ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment