Created
March 31, 2025 11:52
-
-
Save nimlgen/c4b4eb0ae90242a867a6d009ba315119 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* DSP boilerplate */ struct dcvs_v2_req { int type; int _pad; _Bool dcvs_enable; char dcvs_option; _Bool set_latency; int latency; | |
_Bool set_dcvs_params; short _pad2; char target_corner; char min_corner; char max_corner; int _pad3[3];}; | |
int HAP_power_set(void*, void*); | |
typedef union { struct { void *pv; unsigned int len; } buf; struct { int fd; unsigned int offset; } dma; } remote_arg; | |
void* HAP_mmap(void *addr, int len, int prot, int flags, int fd, long offset); | |
int HAP_munmap(void *addr, int len); | |
unsigned long long HAP_perf_get_time_us(void); | |
typedef unsigned long qurt_thread_t; | |
void qurt_thread_exit(int); | |
typedef struct _qurt_barrier { char padding[64]; } qurt_barrier_t; | |
int qurt_barrier_init(qurt_barrier_t*, unsigned int); | |
int qurt_barrier_wait(qurt_barrier_t*); | |
typedef struct _qurt_thread_attr { char name[16]; unsigned char tcb_partition; unsigned char affinity; unsigned short priority; | |
unsigned char asid; unsigned char bus_priority; unsigned short timetest_id; unsigned int stack_size; void *stack_addr; char padding[96]; } qurt_thread_attr_t; | |
int qurt_thread_join(qurt_thread_t tid, int *status); | |
void* malloc(unsigned int); | |
void free(void*); | |
int qurt_thread_create (qurt_thread_t *thread_id, qurt_thread_attr_t *attr, void (*entrypoint) (void *), void *arg); | |
typedef int int32 __attribute__((aligned(1),vector_size(128))); | |
typedef int int2 __attribute__((aligned(1),vector_size(8))); | |
typedef unsigned char unsigned_char128 __attribute__((aligned(1),vector_size(128))); | |
typedef unsigned char unsigned_char2 __attribute__((aligned(1),vector_size(2))); | |
typedef unsigned char unsigned_char8 __attribute__((aligned(1),vector_size(8))); | |
typedef unsigned char unsigned_char4 __attribute__((aligned(1),vector_size(4))); | |
typedef int int96 __attribute__((aligned(1),vector_size(384))); | |
typedef int int128 __attribute__((aligned(1),vector_size(512))); | |
typedef unsigned char unsigned_char32 __attribute__((aligned(1),vector_size(32))); | |
typedef int int64 __attribute__((aligned(1),vector_size(256))); | |
typedef int int24 __attribute__((aligned(1),vector_size(96))); | |
typedef int int104 __attribute__((aligned(1),vector_size(416))); | |
typedef unsigned char unsigned_char24 __attribute__((aligned(1),vector_size(24))); | |
typedef float float128 __attribute__((aligned(1),vector_size(512))); | |
typedef unsigned char unsigned_char104 __attribute__((aligned(1),vector_size(104))); | |
__attribute__((noinline)) void r_112_28_3_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|9); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:56); | |
int alu3 = (alu1?56:112); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
for (int ridx1 = 0; ridx1 < 28; ridx1++) { | |
int32 acc0 = cast0; | |
int32 acc1 = cast0; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int2 acc4 = cast1; | |
int2 acc5 = cast1; | |
int32 acc6 = cast0; | |
for (int ridx2 = 0; ridx2 < 3; ridx2++) { | |
for (int ridx3 = 0; ridx3 < 3; ridx3++) { | |
_Bool alu4 = (((ridx0+ridx3)<1)!=1); | |
int alu5 = ((ridx1*24)+(ridx0*1344)+(ridx3*672)+ridx2); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+((ridx2*384)+(ridx3<<7))))); | |
unsigned char val1 = ((alu4&((ridx1<1)!=1))?*(data1+(alu5+-675)):((unsigned char)(0u))); | |
unsigned char val2 = (alu4?*(data1+(alu5+-672)):((unsigned char)(0u))); | |
unsigned char val3 = (alu4?*(data1+(alu5+-669)):((unsigned char)(0u))); | |
unsigned char val4 = (alu4?*(data1+(alu5+-666)):((unsigned char)(0u))); | |
unsigned char val5 = (alu4?*(data1+(alu5+-663)):((unsigned char)(0u))); | |
unsigned char val6 = (alu4?*(data1+(alu5+-660)):((unsigned char)(0u))); | |
unsigned char val7 = (alu4?*(data1+(alu5+-657)):((unsigned char)(0u))); | |
unsigned char val8 = (alu4?*(data1+(alu5+-654)):((unsigned char)(0u))); | |
unsigned char val9 = (alu4?*(data1+(alu5+-651)):((unsigned char)(0u))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val0, 16843009u); | |
acc0 = __builtin_HEXAGON_V6_vrmpybusv_acc_128B(acc0, val0, (unsigned_char128){val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u)),val1,val2,val3,((unsigned char)(0u))}); | |
acc1 = __builtin_HEXAGON_V6_vrmpybusv_acc_128B(acc1, val0, (unsigned_char128){val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u)),val3,val4,val5,((unsigned char)(0u))}); | |
acc2 = __builtin_HEXAGON_V6_vrmpybusv_acc_128B(acc2, val0, (unsigned_char128){val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u)),val5,val6,val7,((unsigned char)(0u))}); | |
acc3 = __builtin_HEXAGON_V6_vrmpybusv_acc_128B(acc3, val0, (unsigned_char128){val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u)),val7,val8,val9,((unsigned char)(0u))}); | |
int2 cast2 = __builtin_convertvector((unsigned_char2){val1,val3}, int2); | |
int2 cast3 = __builtin_convertvector((unsigned_char2){val2,val4}, int2); | |
int2 cast4 = __builtin_convertvector((unsigned_char2){val3,val5}, int2); | |
int2 cast5 = __builtin_convertvector((unsigned_char2){val5,val7}, int2); | |
int2 cast6 = __builtin_convertvector((unsigned_char2){val6,val8}, int2); | |
int2 cast7 = __builtin_convertvector((unsigned_char2){val7,val9}, int2); | |
acc4 = (acc4+cast2+cast3+cast4); | |
acc5 = (acc5+cast5+cast6+cast7); | |
} | |
} | |
int32 val10 = *((int32*)((data3+0))); | |
int32 alu15 = (val10*161); | |
int32 alu16 = (acc6*-18354); | |
unsigned_char128 alu17 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*161)+alu16+(acc5[1]*-21413)+alu15+32767)/65536), (((acc2*161)+alu16+(acc5[0]*-21413)+alu15+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc1*161)+alu16+(acc4[1]*-21413)+alu15+32767)/65536), (((acc0*161)+alu16+(acc4[0]*-21413)+alu15+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*3584)+(ridx1<<7))))) = alu17; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_112_28_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+0))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+128))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+256))); | |
int32 val3 = *((int32*)((data3+0))); | |
int32 alu0 = (val3*363); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:56); | |
int alu3 = (alu1?56:112); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0*3584); | |
_Bool alu5 = (ridx0<111); | |
_Bool alu6 = ((ridx0<1)!=1); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|57); | |
for (int ridx1 = 0; ridx1 < 28; ridx1++) { | |
_Bool alu8 = (ridx1<27); | |
_Bool alu9 = ((ridx1<1)!=1); | |
int alu10 = (alu4+(ridx1<<7)); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu10))); | |
unsigned_char128 val5 = (alu6?*((unsigned_char128*)((data1+(alu10+-3584)))):cast0); | |
unsigned_char128 val6 = (alu9?*((unsigned_char128*)((data1+(alu10+-32)))):cast0); | |
unsigned_char128 val7 = (alu8?*((unsigned_char128*)((data1+(alu10+128)))):cast0); | |
unsigned_char128 val8 = (alu5?*((unsigned_char128*)((data1+(alu10+3584)))):cast0); | |
unsigned_char128 val9 = ((alu5&alu8)?*((unsigned_char128*)((data1+(alu10+3712)))):cast0); | |
unsigned_char128 val10 = ((alu5&alu9)?*((unsigned_char128*)((data1+(alu10+3552)))):cast0); | |
unsigned_char128 val11 = ((alu6&alu8)?*((unsigned_char128*)((data1+(alu10+-3456)))):cast0); | |
unsigned_char128 val12 = ((alu6&alu9)?*((unsigned_char128*)((data1+(alu10+-3616)))):cast0); | |
unsigned_char128 alu11 = __builtin_shufflevector(val4, val4, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu12 = __builtin_shufflevector(val5, val5, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu13 = __builtin_shufflevector(val8, val8, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu14 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu15 = __builtin_shufflevector(val5, val5, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu16 = __builtin_shufflevector(val8, val8, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu17 = __builtin_shufflevector(val12, val5, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu18 = __builtin_shufflevector(val6, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu19 = __builtin_shufflevector(val10, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu20 = __builtin_shufflevector(val4, val7, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val5, val11, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val8, val9, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu23 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu21), val1, alu20), val2, alu22)*363)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu21, 16843009u), alu20, 16843009u), alu22, 16843009u)*-54450)+alu0+2047)/4096), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu15), val1, alu14), val2, alu16)*363)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu15, 16843009u), alu14, 16843009u), alu16, 16843009u)*-54450)+alu0+2047)/4096)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu12), val1, alu11), val2, alu13)*363)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu12, 16843009u), alu11, 16843009u), alu13, 16843009u)*-54450)+alu0+2047)/4096), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu17), val1, alu18), val2, alu19)*363)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu17, 16843009u), alu18, 16843009u), alu19, 16843009u)*-54450)+alu0+2047)/4096))); | |
*((unsigned_char128*)((data0+alu10))) = alu23; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_3136_4_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*173); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:1568); | |
int alu4 = (alu2?1568:3136); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu11 = (alu5+(ridx1<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*173)+(acc1[1]*-24566)+alu0+32767)/65536)+124), ((((acc4*173)+(acc1[0]*-24566)+alu0+32767)/65536)+124)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*173)+(acc0[1]*-24566)+alu0+32767)/65536)+124), ((((acc2*173)+(acc0[0]*-24566)+alu0+32767)/65536)+124))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void r_3136_4_8_32_4_3(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|24); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:1568); | |
int alu3 = (alu1?1568:3136); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val6 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val7 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val8 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val9 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val0, 16843009u), val1, 16843009u); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val2, 16843009u), val3, 16843009u); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val4, 16843009u), val5, 16843009u); | |
unsigned_char4 alu22 = __builtin_shufflevector(val6, val6, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu22)); | |
unsigned_char4 alu23 = __builtin_shufflevector(val7, val7, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu23)); | |
unsigned_char4 alu24 = __builtin_shufflevector(val8, val8, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val9, val9, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val6, val6, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val7, val7, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val8, val8, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val9, val9, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu29)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val6)), (*((long long*)&val7))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val8)), (*((long long*)&val9))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int96 val10 = *((int96*)((data3+0))); | |
int32 alu45 = __builtin_shufflevector(val10, val10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu46 = (alu45*193); | |
int32 alu47 = __builtin_shufflevector(val10, val10, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu48 = (alu47*193); | |
int32 alu49 = __builtin_shufflevector(val10, val10, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu50 = (alu49*193); | |
int32 alu51 = (acc14*-23932); | |
int32 alu52 = (acc15*-23932); | |
int32 alu53 = (acc16*-23932); | |
unsigned_char128 alu54 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*193)+alu51+(acc1[1]*-22002)+alu46+8191)/16384), (((acc4*193)+alu51+(acc1[0]*-22002)+alu46+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*193)+alu51+(acc0[1]*-22002)+alu46+8191)/16384), (((acc2*193)+alu51+(acc0[0]*-22002)+alu46+8191)/16384))); | |
*((unsigned_char128*)((data0+alu4))) = alu54; | |
unsigned_char128 alu56 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*193)+alu52+(acc1[1]*-22002)+alu48+8191)/16384), (((acc8*193)+alu52+(acc1[0]*-22002)+alu48+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*193)+alu52+(acc0[1]*-22002)+alu48+8191)/16384), (((acc6*193)+alu52+(acc0[0]*-22002)+alu48+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+401408)))) = alu56; | |
unsigned_char128 alu58 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*193)+alu53+(acc1[1]*-22002)+alu50+8191)/16384), (((acc12*193)+alu53+(acc1[0]*-22002)+alu50+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*193)+alu53+(acc0[1]*-22002)+alu50+8191)/16384), (((acc10*193)+alu53+(acc0[0]*-22002)+alu50+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+802816)))) = alu58; | |
} | |
} | |
__attribute__((noinline)) void r_56_3_14_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|1); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:28); | |
int alu4 = (alu2?28:56); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*7168); | |
_Bool alu6 = ((ridx0<1)!=1); | |
for (int ridx1 = 0; ridx1 < 3; ridx1++) { | |
int alu7 = (ridx1*384); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu7))); | |
int32 val1 = *((int32*)((data3+(ridx1<<5)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu7+128)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu7+256)))); | |
int32 alu8 = (val1*1429); | |
int alu9 = (ridx1*401408); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu9+alu5), 0x808000|57); | |
for (int ridx2 = 0; ridx2 < 14; ridx2++) { | |
_Bool alu11 = ((ridx2<1)!=1); | |
int alu12 = (alu9+alu5+(ridx2<<8)); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu12))); | |
unsigned_char128 val5 = ((alu6&alu11)?*((unsigned_char128*)((data1+(alu12+-3616)))):cast0); | |
unsigned_char128 val6 = (alu6?*((unsigned_char128*)((data1+(alu12+-3584)))):cast0); | |
unsigned_char128 val7 = (alu6?*((unsigned_char128*)((data1+(alu12+-3456)))):cast0); | |
unsigned_char128 val8 = (alu11?*((unsigned_char128*)((data1+(alu12+-32)))):cast0); | |
unsigned_char128 val9 = *((unsigned_char128*)((data1+(alu12+128)))); | |
unsigned_char128 val10 = (alu11?*((unsigned_char128*)((data1+(alu12+3552)))):cast0); | |
unsigned_char128 val11 = *((unsigned_char128*)((data1+(alu12+3584)))); | |
unsigned_char128 val12 = *((unsigned_char128*)((data1+(alu12+3712)))); | |
unsigned_char128 alu13 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu14 = __builtin_shufflevector(val6, val6, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu15 = __builtin_shufflevector(val7, val7, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu16 = __builtin_shufflevector(val9, val9, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu17 = __builtin_shufflevector(val11, val11, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu18 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu19 = __builtin_shufflevector(val5, val6, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu20 = __builtin_shufflevector(val8, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val10, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val4, val9, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val6, val7, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val11, val12, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu25 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu15), val2, alu16), val3, alu18)*1429)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu15, 16843009u), alu16, 16843009u), alu18, 16843009u)*-208634)+alu8+16383)/32768), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu23), val2, alu22), val3, alu24)*1429)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu23, 16843009u), alu22, 16843009u), alu24, 16843009u)*-208634)+alu8+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu14), val2, alu13), val3, alu17)*1429)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu14, 16843009u), alu13, 16843009u), alu17, 16843009u)*-208634)+alu8+16383)/32768), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu19), val2, alu20), val3, alu21)*1429)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu19, 16843009u), alu20, 16843009u), alu21, 16843009u)*-208634)+alu8+16383)/32768))); | |
*((unsigned_char128*)((data0+((ridx2<<7)+(ridx0*1792)+(ridx1*100352))))) = alu25; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_784_12_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*55); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|24); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:392); | |
int alu4 = (alu2?392:784); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 12; ridx1++) { | |
int alu11 = (alu5+((ridx1>>2)*100352)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*55)+(acc1[1]*-6985)+alu0+8191)/16384)+114), ((((acc4*55)+(acc1[0]*-6985)+alu0+8191)/16384)+114)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*55)+(acc0[1]*-6985)+alu0+8191)/16384)+114), ((((acc2*55)+(acc0[0]*-6985)+alu0+8191)/16384)+114))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void r_784_4_8_32_4_5(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|40); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:392); | |
int alu3 = (alu1?392:784); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
int32 acc22 = cast0; | |
int32 acc23 = cast0; | |
int32 acc24 = cast0; | |
int32 acc25 = cast0; | |
int32 acc26 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu10+3072)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu10+3200)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu10+4096)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu10+4224)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val10 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val11 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val12 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val13 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc22 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc22, val0, 16843009u), val1, 16843009u); | |
acc23 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc23, val2, 16843009u), val3, 16843009u); | |
acc24 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc24, val4, 16843009u), val5, 16843009u); | |
acc25 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc25, val6, 16843009u), val7, 16843009u); | |
acc26 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc26, val8, 16843009u), val9, 16843009u); | |
unsigned_char4 alu24 = __builtin_shufflevector(val10, val10, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val11, val11, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val12, val12, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val13, val13, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val10, val10, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val11, val11, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val12, val12, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val13, val13, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu31)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val6, cast2), val7, cast6); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val6, cast3), val7, cast7); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val6, cast4), val7, cast8); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val6, cast5), val7, cast9); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val8, cast2), val9, cast6); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val8, cast3), val9, cast7); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val8, cast4), val9, cast8); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val8, cast5), val9, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val10)), (*((long long*)&val11))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val12)), (*((long long*)&val13))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val14 = *((int128*)((data3+0))); | |
int32 val15 = *((int32*)((data3+128))); | |
int32 alu55 = (val15*149); | |
int32 alu56 = __builtin_shufflevector(val14, val14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu57 = (alu56*149); | |
int32 alu58 = __builtin_shufflevector(val14, val14, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu59 = (alu58*149); | |
int32 alu60 = __builtin_shufflevector(val14, val14, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu61 = (alu60*149); | |
int32 alu62 = __builtin_shufflevector(val14, val14, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu63 = (alu62*149); | |
int32 alu64 = (acc22*-16986); | |
int32 alu65 = (acc23*-16986); | |
int32 alu66 = (acc24*-16986); | |
int32 alu67 = (acc25*-16986); | |
int32 alu68 = (acc26*-16986); | |
unsigned_char128 alu69 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*149)+alu64+(acc1[1]*-19817)+alu57+8191)/16384), (((acc4*149)+alu64+(acc1[0]*-19817)+alu57+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*149)+alu64+(acc0[1]*-19817)+alu57+8191)/16384), (((acc2*149)+alu64+(acc0[0]*-19817)+alu57+8191)/16384))); | |
*((unsigned_char128*)((data0+alu4))) = alu69; | |
unsigned_char128 alu71 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*149)+alu65+(acc1[1]*-19817)+alu59+8191)/16384), (((acc8*149)+alu65+(acc1[0]*-19817)+alu59+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*149)+alu65+(acc0[1]*-19817)+alu59+8191)/16384), (((acc6*149)+alu65+(acc0[0]*-19817)+alu59+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+100352)))) = alu71; | |
unsigned_char128 alu73 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*149)+alu66+(acc1[1]*-19817)+alu61+8191)/16384), (((acc12*149)+alu66+(acc1[0]*-19817)+alu61+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*149)+alu66+(acc0[1]*-19817)+alu61+8191)/16384), (((acc10*149)+alu66+(acc0[0]*-19817)+alu61+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+200704)))) = alu73; | |
unsigned_char128 alu75 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*149)+alu67+(acc1[1]*-19817)+alu63+8191)/16384), (((acc16*149)+alu67+(acc1[0]*-19817)+alu63+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*149)+alu67+(acc0[1]*-19817)+alu63+8191)/16384), (((acc14*149)+alu67+(acc0[0]*-19817)+alu63+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+301056)))) = alu75; | |
unsigned_char128 alu77 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*149)+alu68+(acc1[1]*-19817)+alu55+8191)/16384), (((acc20*149)+alu68+(acc1[0]*-19817)+alu55+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*149)+alu68+(acc0[1]*-19817)+alu55+8191)/16384), (((acc18*149)+alu68+(acc0[0]*-19817)+alu55+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+401408)))) = alu77; | |
} | |
} | |
__attribute__((noinline)) void r_56_5_14_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:28); | |
int alu4 = (alu2?28:56); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*14); | |
int alu6 = (ridx0*1792); | |
_Bool alu7 = ((ridx0<1)!=1); | |
_Bool alu8 = (ridx0<55); | |
for (int ridx1 = 0; ridx1 < 5; ridx1++) { | |
int alu9 = (ridx1*384); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu9))); | |
int32 val1 = *((int32*)((data3+(ridx1<<5)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu9+128)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu9+256)))); | |
int32 alu10 = (val1*87); | |
int alu11 = (ridx1*100352); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu11+alu6), 0x808000|29); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+15)%784)<<7)+alu11), 0x808000|29); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+769)%784)<<7)+alu11), 0x808000|29); | |
for (int ridx2 = 0; ridx2 < 14; ridx2++) { | |
int alu15 = (ridx2<<7); | |
_Bool alu16 = ((ridx2<1)!=1); | |
_Bool alu17 = (ridx2<13); | |
int alu18 = (alu5+ridx2); | |
int alu19 = (alu11+alu6+alu15); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu19))); | |
unsigned_char128 val5 = ((alu8&alu17)?*((unsigned_char128*)((data1+((((alu18+15)%784)<<7)+alu11)))):cast0); | |
unsigned_char128 val6 = (alu7?*((unsigned_char128*)((data1+(alu19+-1792)))):cast0); | |
unsigned_char128 val7 = ((alu7&alu17)?*((unsigned_char128*)((data1+(alu19+-1664)))):cast0); | |
unsigned_char128 val8 = (alu16?*((unsigned_char128*)((data1+(alu19+-32)))):cast0); | |
unsigned_char128 val9 = (alu17?*((unsigned_char128*)((data1+(alu19+128)))):cast0); | |
unsigned_char128 val10 = ((alu8&alu16)?*((unsigned_char128*)((data1+(alu19+1760)))):cast0); | |
unsigned_char128 val11 = (alu8?*((unsigned_char128*)((data1+(alu19+1792)))):cast0); | |
unsigned_char128 val12 = ((alu7&alu16)?*((unsigned_char128*)((data1+((((alu18+769)%784)<<7)+alu11+96)))):cast0); | |
unsigned_char128 alu20 = __builtin_shufflevector(val4, val4, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val6, val6, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val11, val11, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val6, val6, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val11, val11, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val8, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val10, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val12, val6, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val4, val9, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val6, val7, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val11, val5, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu32 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu30), val2, alu29), val3, alu31)*87)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu30, 16843009u), alu29, 16843009u), alu31, 16843009u)*-11136)+alu10+2047)/4096), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu24), val2, alu23), val3, alu25)*87)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu24, 16843009u), alu23, 16843009u), alu25, 16843009u)*-11136)+alu10+2047)/4096)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu21), val2, alu20), val3, alu22)*87)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu21, 16843009u), alu20, 16843009u), alu22, 16843009u)*-11136)+alu10+2047)/4096), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu28), val2, alu26), val3, alu27)*87)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu28, 16843009u), alu26, 16843009u), alu27, 16843009u)*-11136)+alu10+2047)/4096))); | |
*((unsigned_char128*)((data0+(alu15+alu6+alu11)))) = alu32; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_784_20_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*101); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|40); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:392); | |
int alu4 = (alu2?392:784); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 20; ridx1++) { | |
int alu11 = (alu5+((ridx1>>2)*100352)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*101)+(acc1[1]*-11413)+alu0+16383)/32768)+115), ((((acc4*101)+(acc1[0]*-11413)+alu0+16383)/32768)+115)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*101)+(acc0[1]*-11413)+alu0+16383)/32768)+115), ((((acc2*101)+(acc0[0]*-11413)+alu0+16383)/32768)+115))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void E_784_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:392); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|8); | |
int alu5 = (alu0?392:784); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*3655)+(cast6*2707)+57509)/65536)*450883)+12507)/32768)+-54), (((((((cast5*3655)+(cast4*2707)+57509)/65536)*450883)+12507)/32768)+-54)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*3655)+(cast2*2707)+57509)/65536)*450883)+12507)/32768)+-54), (((((((cast1*3655)+(cast0*2707)+57509)/65536)*450883)+12507)/32768)+-54))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_784_4_8_32_4_5n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|40); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:392); | |
int alu3 = (alu1?392:784); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
int32 acc22 = cast0; | |
int32 acc23 = cast0; | |
int32 acc24 = cast0; | |
int32 acc25 = cast0; | |
int32 acc26 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu10+3072)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu10+3200)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu10+4096)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu10+4224)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val10 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val11 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val12 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val13 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc22 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc22, val0, 16843009u), val1, 16843009u); | |
acc23 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc23, val2, 16843009u), val3, 16843009u); | |
acc24 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc24, val4, 16843009u), val5, 16843009u); | |
acc25 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc25, val6, 16843009u), val7, 16843009u); | |
acc26 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc26, val8, 16843009u), val9, 16843009u); | |
unsigned_char4 alu24 = __builtin_shufflevector(val10, val10, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val11, val11, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val12, val12, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val13, val13, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val10, val10, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val11, val11, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val12, val12, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val13, val13, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu31)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val6, cast2), val7, cast6); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val6, cast3), val7, cast7); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val6, cast4), val7, cast8); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val6, cast5), val7, cast9); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val8, cast2), val9, cast6); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val8, cast3), val9, cast7); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val8, cast4), val9, cast8); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val8, cast5), val9, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val10)), (*((long long*)&val11))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val12)), (*((long long*)&val13))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val14 = *((int128*)((data3+0))); | |
int32 val15 = *((int32*)((data3+128))); | |
int32 alu55 = (val15*211); | |
int32 alu56 = __builtin_shufflevector(val14, val14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu57 = (alu56*211); | |
int32 alu58 = __builtin_shufflevector(val14, val14, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu59 = (alu58*211); | |
int32 alu60 = __builtin_shufflevector(val14, val14, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu61 = (alu60*211); | |
int32 alu62 = __builtin_shufflevector(val14, val14, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu63 = (alu62*211); | |
int32 alu64 = (acc22*-23421); | |
int32 alu65 = (acc23*-23421); | |
int32 alu66 = (acc24*-23421); | |
int32 alu67 = (acc25*-23421); | |
int32 alu68 = (acc26*-23421); | |
unsigned_char128 alu69 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*211)+alu64+(acc1[1]*-26797)+alu57+8191)/16384), (((acc4*211)+alu64+(acc1[0]*-26797)+alu57+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*211)+alu64+(acc0[1]*-26797)+alu57+8191)/16384), (((acc2*211)+alu64+(acc0[0]*-26797)+alu57+8191)/16384))); | |
*((unsigned_char128*)((data0+alu4))) = alu69; | |
unsigned_char128 alu71 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*211)+alu65+(acc1[1]*-26797)+alu59+8191)/16384), (((acc8*211)+alu65+(acc1[0]*-26797)+alu59+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*211)+alu65+(acc0[1]*-26797)+alu59+8191)/16384), (((acc6*211)+alu65+(acc0[0]*-26797)+alu59+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+100352)))) = alu71; | |
unsigned_char128 alu73 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*211)+alu66+(acc1[1]*-26797)+alu61+8191)/16384), (((acc12*211)+alu66+(acc1[0]*-26797)+alu61+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*211)+alu66+(acc0[1]*-26797)+alu61+8191)/16384), (((acc10*211)+alu66+(acc0[0]*-26797)+alu61+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+200704)))) = alu73; | |
unsigned_char128 alu75 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*211)+alu67+(acc1[1]*-26797)+alu63+8191)/16384), (((acc16*211)+alu67+(acc1[0]*-26797)+alu63+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*211)+alu67+(acc0[1]*-26797)+alu63+8191)/16384), (((acc14*211)+alu67+(acc0[0]*-26797)+alu63+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+301056)))) = alu75; | |
unsigned_char128 alu77 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*211)+alu68+(acc1[1]*-26797)+alu55+8191)/16384), (((acc20*211)+alu68+(acc1[0]*-26797)+alu55+8191)/16384)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*211)+alu68+(acc0[1]*-26797)+alu55+8191)/16384), (((acc18*211)+alu68+(acc0[0]*-26797)+alu55+8191)/16384))); | |
*((unsigned_char128*)((data0+(alu4+401408)))) = alu77; | |
} | |
} | |
__attribute__((noinline)) void r_28_5_7_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:14); | |
int alu4 = (alu2?14:28); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*3584); | |
_Bool alu6 = ((ridx0<1)!=1); | |
for (int ridx1 = 0; ridx1 < 5; ridx1++) { | |
int alu7 = (ridx1*384); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu7))); | |
int32 val1 = *((int32*)((data3+(ridx1<<5)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu7+128)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu7+256)))); | |
int32 alu8 = (val1*2481); | |
int alu9 = (ridx1*100352); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu9+alu5), 0x808000|29); | |
for (int ridx2 = 0; ridx2 < 7; ridx2++) { | |
_Bool alu11 = ((ridx2<1)!=1); | |
int alu12 = (alu9+alu5+(ridx2<<8)); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu12))); | |
unsigned_char128 val5 = ((alu6&alu11)?*((unsigned_char128*)((data1+(alu12+-1824)))):cast0); | |
unsigned_char128 val6 = (alu6?*((unsigned_char128*)((data1+(alu12+-1792)))):cast0); | |
unsigned_char128 val7 = (alu6?*((unsigned_char128*)((data1+(alu12+-1664)))):cast0); | |
unsigned_char128 val8 = (alu11?*((unsigned_char128*)((data1+(alu12+-32)))):cast0); | |
unsigned_char128 val9 = *((unsigned_char128*)((data1+(alu12+128)))); | |
unsigned_char128 val10 = (alu11?*((unsigned_char128*)((data1+(alu12+1760)))):cast0); | |
unsigned_char128 val11 = *((unsigned_char128*)((data1+(alu12+1792)))); | |
unsigned_char128 val12 = *((unsigned_char128*)((data1+(alu12+1920)))); | |
unsigned_char128 alu13 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu14 = __builtin_shufflevector(val6, val6, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu15 = __builtin_shufflevector(val7, val7, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu16 = __builtin_shufflevector(val9, val9, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu17 = __builtin_shufflevector(val11, val11, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu18 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu19 = __builtin_shufflevector(val5, val6, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu20 = __builtin_shufflevector(val8, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val10, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val4, val9, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val6, val7, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val11, val12, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu25 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu15), val2, alu16), val3, alu18)*2481)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu15, 16843009u), alu16, 16843009u), alu18, 16843009u)*-302682)+alu8+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu23), val2, alu22), val3, alu24)*2481)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu23, 16843009u), alu22, 16843009u), alu24, 16843009u)*-302682)+alu8+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu14), val2, alu13), val3, alu17)*2481)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu14, 16843009u), alu13, 16843009u), alu17, 16843009u)*-302682)+alu8+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu19), val2, alu20), val3, alu21)*2481)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu19, 16843009u), alu20, 16843009u), alu21, 16843009u)*-302682)+alu8+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx2<<7)+(ridx0*896)+(ridx1*25088))))) = alu25; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_196_20_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*87); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|40); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:98); | |
int alu4 = (alu2?98:196); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 20; ridx1++) { | |
int alu11 = (alu5+((ridx1>>2)*25088)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*87)+(acc1[1]*-9918)+alu0+16383)/32768)+118), ((((acc4*87)+(acc1[0]*-9918)+alu0+16383)/32768)+118)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*87)+(acc0[1]*-9918)+alu0+16383)/32768)+118), ((((acc2*87)+(acc0[0]*-9918)+alu0+16383)/32768)+118))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void r_196_4_8_32_4_6(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|48); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:98); | |
int alu3 = (alu1?98:196); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
int32 acc22 = cast0; | |
int32 acc23 = cast0; | |
int32 acc24 = cast0; | |
int32 acc25 = cast0; | |
int32 acc26 = cast0; | |
int32 acc27 = cast0; | |
int32 acc28 = cast0; | |
int32 acc29 = cast0; | |
int32 acc30 = cast0; | |
int32 acc31 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu10+3072)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu10+3200)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu10+4096)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu10+4224)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu10+5120)))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+(alu10+5248)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val12 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val13 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val14 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val15 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc26 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc26, val0, 16843009u), val1, 16843009u); | |
acc27 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc27, val2, 16843009u), val3, 16843009u); | |
acc28 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc28, val4, 16843009u), val5, 16843009u); | |
acc29 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc29, val6, 16843009u), val7, 16843009u); | |
acc30 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc30, val8, 16843009u), val9, 16843009u); | |
acc31 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc31, val10, 16843009u), val11, 16843009u); | |
unsigned_char4 alu25 = __builtin_shufflevector(val12, val12, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val13, val13, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val14, val14, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val15, val15, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val12, val12, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val13, val13, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val14, val14, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val15, val15, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val6, cast2), val7, cast6); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val6, cast3), val7, cast7); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val6, cast4), val7, cast8); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val6, cast5), val7, cast9); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val8, cast2), val9, cast6); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val8, cast3), val9, cast7); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val8, cast4), val9, cast8); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val8, cast5), val9, cast9); | |
acc22 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc22, val10, cast2), val11, cast6); | |
acc23 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc23, val10, cast3), val11, cast7); | |
acc24 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc24, val10, cast4), val11, cast8); | |
acc25 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc25, val10, cast5), val11, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val12)), (*((long long*)&val13))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val14)), (*((long long*)&val15))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val16 = *((int128*)((data3+0))); | |
int64 val17 = *((int64*)((data3+128))); | |
int32 alu60 = __builtin_shufflevector(val17, val17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu61 = (alu60*457); | |
int32 alu62 = __builtin_shufflevector(val16, val16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu63 = (alu62*457); | |
int32 alu64 = __builtin_shufflevector(val17, val17, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu65 = (alu64*457); | |
int32 alu66 = __builtin_shufflevector(val16, val16, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu67 = (alu66*457); | |
int32 alu68 = __builtin_shufflevector(val16, val16, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu69 = (alu68*457); | |
int32 alu70 = __builtin_shufflevector(val16, val16, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu71 = (alu70*457); | |
int32 alu72 = (acc26*-53926); | |
int32 alu73 = (acc27*-53926); | |
int32 alu74 = (acc28*-53926); | |
int32 alu75 = (acc29*-53926); | |
int32 alu76 = (acc30*-53926); | |
int32 alu77 = (acc31*-53926); | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*457)+alu72+(acc1[1]*-57125)+alu63+32767)/65536), (((acc4*457)+alu72+(acc1[0]*-57125)+alu63+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*457)+alu72+(acc0[1]*-57125)+alu63+32767)/65536), (((acc2*457)+alu72+(acc0[0]*-57125)+alu63+32767)/65536))); | |
*((unsigned_char128*)((data0+alu4))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*457)+alu73+(acc1[1]*-57125)+alu67+32767)/65536), (((acc8*457)+alu73+(acc1[0]*-57125)+alu67+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*457)+alu73+(acc0[1]*-57125)+alu67+32767)/65536), (((acc6*457)+alu73+(acc0[0]*-57125)+alu67+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+25088)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*457)+alu74+(acc1[1]*-57125)+alu69+32767)/65536), (((acc12*457)+alu74+(acc1[0]*-57125)+alu69+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*457)+alu74+(acc0[1]*-57125)+alu69+32767)/65536), (((acc10*457)+alu74+(acc0[0]*-57125)+alu69+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+50176)))) = alu82; | |
unsigned_char128 alu84 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*457)+alu75+(acc1[1]*-57125)+alu71+32767)/65536), (((acc16*457)+alu75+(acc1[0]*-57125)+alu71+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*457)+alu75+(acc0[1]*-57125)+alu71+32767)/65536), (((acc14*457)+alu75+(acc0[0]*-57125)+alu71+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+75264)))) = alu84; | |
unsigned_char128 alu86 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*457)+alu76+(acc1[1]*-57125)+alu61+32767)/65536), (((acc20*457)+alu76+(acc1[0]*-57125)+alu61+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*457)+alu76+(acc0[1]*-57125)+alu61+32767)/65536), (((acc18*457)+alu76+(acc0[0]*-57125)+alu61+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+100352)))) = alu86; | |
unsigned_char128 alu88 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc25*457)+alu77+(acc1[1]*-57125)+alu65+32767)/65536), (((acc24*457)+alu77+(acc1[0]*-57125)+alu65+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc23*457)+alu77+(acc0[1]*-57125)+alu65+32767)/65536), (((acc22*457)+alu77+(acc0[0]*-57125)+alu65+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+125440)))) = alu88; | |
} | |
} | |
__attribute__((noinline)) void r_28_6_7_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|18); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:14); | |
int alu4 = (alu2?14:28); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*7); | |
int alu6 = (ridx0*896); | |
_Bool alu7 = ((ridx0<1)!=1); | |
_Bool alu8 = (ridx0<27); | |
for (int ridx1 = 0; ridx1 < 6; ridx1++) { | |
int alu9 = (ridx1*384); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu9))); | |
int32 val1 = *((int32*)((data3+(ridx1<<5)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu9+128)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu9+256)))); | |
int32 alu10 = (val1*999); | |
int alu11 = (ridx1*25088); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu11+alu6), 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+8)%196)<<7)+alu11), 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+188)%196)<<7)+alu11), 0x808000|15); | |
for (int ridx2 = 0; ridx2 < 7; ridx2++) { | |
int alu15 = (ridx2<<7); | |
_Bool alu16 = ((ridx2<1)!=1); | |
_Bool alu17 = (ridx2<6); | |
int alu18 = (alu5+ridx2); | |
int alu19 = (alu11+alu6+alu15); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu19))); | |
unsigned_char128 val5 = ((alu8&alu17)?*((unsigned_char128*)((data1+((((alu18+8)%196)<<7)+alu11)))):cast0); | |
unsigned_char128 val6 = (alu7?*((unsigned_char128*)((data1+(alu19+-896)))):cast0); | |
unsigned_char128 val7 = ((alu7&alu17)?*((unsigned_char128*)((data1+(alu19+-768)))):cast0); | |
unsigned_char128 val8 = (alu16?*((unsigned_char128*)((data1+(alu19+-32)))):cast0); | |
unsigned_char128 val9 = (alu17?*((unsigned_char128*)((data1+(alu19+128)))):cast0); | |
unsigned_char128 val10 = ((alu8&alu16)?*((unsigned_char128*)((data1+(alu19+864)))):cast0); | |
unsigned_char128 val11 = (alu8?*((unsigned_char128*)((data1+(alu19+896)))):cast0); | |
unsigned_char128 val12 = ((alu7&alu16)?*((unsigned_char128*)((data1+((((alu18+188)%196)<<7)+alu11+96)))):cast0); | |
unsigned_char128 alu20 = __builtin_shufflevector(val4, val4, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val6, val6, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val11, val11, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val6, val6, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val11, val11, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val8, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val10, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val12, val6, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val4, val9, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val6, val7, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val11, val5, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu32 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu30), val2, alu29), val3, alu31)*999)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu30, 16843009u), alu29, 16843009u), alu31, 16843009u)*-122877)+alu10+16383)/32768), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu24), val2, alu23), val3, alu25)*999)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu24, 16843009u), alu23, 16843009u), alu25, 16843009u)*-122877)+alu10+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu21), val2, alu20), val3, alu22)*999)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu21, 16843009u), alu20, 16843009u), alu22, 16843009u)*-122877)+alu10+16383)/32768), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu28), val2, alu26), val3, alu27)*999)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu28, 16843009u), alu26, 16843009u), alu27, 16843009u)*-122877)+alu10+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu15+alu6+alu11)))) = alu32; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_196_24_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*161); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|48); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:98); | |
int alu4 = (alu2?98:196); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 24; ridx1++) { | |
int alu11 = (alu5+((ridx1>>2)*25088)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*161)+(acc1[1]*-20930)+alu0+32767)/65536)+123), ((((acc4*161)+(acc1[0]*-20930)+alu0+32767)/65536)+123)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*161)+(acc0[1]*-20930)+alu0+32767)/65536)+123), ((((acc2*161)+(acc0[0]*-20930)+alu0+32767)/65536)+123))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void E_196_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:98); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|8); | |
int alu5 = (alu0?98:196); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*2219)+(cast6*2474)+24955)/65536)*348127)+4392)/16384)+-71), (((((((cast5*2219)+(cast4*2474)+24955)/65536)*348127)+4392)/16384)+-71)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*2219)+(cast2*2474)+24955)/65536)*348127)+4392)/16384)+-71), (((((((cast1*2219)+(cast0*2474)+24955)/65536)*348127)+4392)/16384)+-71))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_196_4_8_32_4_6n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|48); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:98); | |
int alu3 = (alu1?98:196); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
int32 acc22 = cast0; | |
int32 acc23 = cast0; | |
int32 acc24 = cast0; | |
int32 acc25 = cast0; | |
int32 acc26 = cast0; | |
int32 acc27 = cast0; | |
int32 acc28 = cast0; | |
int32 acc29 = cast0; | |
int32 acc30 = cast0; | |
int32 acc31 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu10+3072)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu10+3200)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu10+4096)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu10+4224)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu10+5120)))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+(alu10+5248)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val12 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val13 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val14 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val15 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc26 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc26, val0, 16843009u), val1, 16843009u); | |
acc27 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc27, val2, 16843009u), val3, 16843009u); | |
acc28 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc28, val4, 16843009u), val5, 16843009u); | |
acc29 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc29, val6, 16843009u), val7, 16843009u); | |
acc30 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc30, val8, 16843009u), val9, 16843009u); | |
acc31 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc31, val10, 16843009u), val11, 16843009u); | |
unsigned_char4 alu25 = __builtin_shufflevector(val12, val12, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val13, val13, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val14, val14, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val15, val15, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val12, val12, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val13, val13, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val14, val14, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val15, val15, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val6, cast2), val7, cast6); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val6, cast3), val7, cast7); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val6, cast4), val7, cast8); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val6, cast5), val7, cast9); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val8, cast2), val9, cast6); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val8, cast3), val9, cast7); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val8, cast4), val9, cast8); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val8, cast5), val9, cast9); | |
acc22 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc22, val10, cast2), val11, cast6); | |
acc23 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc23, val10, cast3), val11, cast7); | |
acc24 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc24, val10, cast4), val11, cast8); | |
acc25 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc25, val10, cast5), val11, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val12)), (*((long long*)&val13))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val14)), (*((long long*)&val15))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val16 = *((int128*)((data3+0))); | |
int64 val17 = *((int64*)((data3+128))); | |
int32 alu60 = __builtin_shufflevector(val17, val17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu61 = (alu60*445); | |
int32 alu62 = __builtin_shufflevector(val16, val16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu63 = (alu62*445); | |
int32 alu64 = __builtin_shufflevector(val17, val17, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu65 = (alu64*445); | |
int32 alu66 = __builtin_shufflevector(val16, val16, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu67 = (alu66*445); | |
int32 alu68 = __builtin_shufflevector(val16, val16, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu69 = (alu68*445); | |
int32 alu70 = __builtin_shufflevector(val16, val16, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu71 = (alu70*445); | |
int32 alu72 = (acc26*-53400); | |
int32 alu73 = (acc27*-53400); | |
int32 alu74 = (acc28*-53400); | |
int32 alu75 = (acc29*-53400); | |
int32 alu76 = (acc30*-53400); | |
int32 alu77 = (acc31*-53400); | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*445)+alu72+(acc1[1]*-51175)+alu63+32767)/65536), (((acc4*445)+alu72+(acc1[0]*-51175)+alu63+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*445)+alu72+(acc0[1]*-51175)+alu63+32767)/65536), (((acc2*445)+alu72+(acc0[0]*-51175)+alu63+32767)/65536))); | |
*((unsigned_char128*)((data0+alu4))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*445)+alu73+(acc1[1]*-51175)+alu67+32767)/65536), (((acc8*445)+alu73+(acc1[0]*-51175)+alu67+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*445)+alu73+(acc0[1]*-51175)+alu67+32767)/65536), (((acc6*445)+alu73+(acc0[0]*-51175)+alu67+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+25088)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*445)+alu74+(acc1[1]*-51175)+alu69+32767)/65536), (((acc12*445)+alu74+(acc1[0]*-51175)+alu69+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*445)+alu74+(acc0[1]*-51175)+alu69+32767)/65536), (((acc10*445)+alu74+(acc0[0]*-51175)+alu69+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+50176)))) = alu82; | |
unsigned_char128 alu84 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*445)+alu75+(acc1[1]*-51175)+alu71+32767)/65536), (((acc16*445)+alu75+(acc1[0]*-51175)+alu71+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*445)+alu75+(acc0[1]*-51175)+alu71+32767)/65536), (((acc14*445)+alu75+(acc0[0]*-51175)+alu71+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+75264)))) = alu84; | |
unsigned_char128 alu86 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*445)+alu76+(acc1[1]*-51175)+alu61+32767)/65536), (((acc20*445)+alu76+(acc1[0]*-51175)+alu61+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*445)+alu76+(acc0[1]*-51175)+alu61+32767)/65536), (((acc18*445)+alu76+(acc0[0]*-51175)+alu61+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+100352)))) = alu86; | |
unsigned_char128 alu88 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc25*445)+alu77+(acc1[1]*-51175)+alu65+32767)/65536), (((acc24*445)+alu77+(acc1[0]*-51175)+alu65+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc23*445)+alu77+(acc0[1]*-51175)+alu65+32767)/65536), (((acc22*445)+alu77+(acc0[0]*-51175)+alu65+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu4+125440)))) = alu88; | |
} | |
} | |
__attribute__((noinline)) void r_28_6_7_3_3_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|18); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:14); | |
int alu4 = (alu2?14:28); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*7); | |
int alu6 = (ridx0*896); | |
_Bool alu7 = ((ridx0<1)!=1); | |
_Bool alu8 = (ridx0<27); | |
for (int ridx1 = 0; ridx1 < 6; ridx1++) { | |
int alu9 = (ridx1*384); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu9))); | |
int32 val1 = *((int32*)((data3+(ridx1<<5)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu9+128)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu9+256)))); | |
int32 alu10 = (val1*1241); | |
int alu11 = (ridx1*25088); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu11+alu6), 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+8)%196)<<7)+alu11), 0x808000|15); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu5+188)%196)<<7)+alu11), 0x808000|15); | |
for (int ridx2 = 0; ridx2 < 7; ridx2++) { | |
int alu15 = (ridx2<<7); | |
_Bool alu16 = ((ridx2<1)!=1); | |
_Bool alu17 = (ridx2<6); | |
int alu18 = (alu5+ridx2); | |
int alu19 = (alu11+alu6+alu15); | |
unsigned_char128 val4 = *((unsigned_char128*)((data1+alu19))); | |
unsigned_char128 val5 = ((alu8&alu17)?*((unsigned_char128*)((data1+((((alu18+8)%196)<<7)+alu11)))):cast0); | |
unsigned_char128 val6 = (alu7?*((unsigned_char128*)((data1+(alu19+-896)))):cast0); | |
unsigned_char128 val7 = ((alu7&alu17)?*((unsigned_char128*)((data1+(alu19+-768)))):cast0); | |
unsigned_char128 val8 = (alu16?*((unsigned_char128*)((data1+(alu19+-32)))):cast0); | |
unsigned_char128 val9 = (alu17?*((unsigned_char128*)((data1+(alu19+128)))):cast0); | |
unsigned_char128 val10 = ((alu8&alu16)?*((unsigned_char128*)((data1+(alu19+864)))):cast0); | |
unsigned_char128 val11 = (alu8?*((unsigned_char128*)((data1+(alu19+896)))):cast0); | |
unsigned_char128 val12 = ((alu7&alu16)?*((unsigned_char128*)((data1+((((alu18+188)%196)<<7)+alu11+96)))):cast0); | |
unsigned_char128 alu20 = __builtin_shufflevector(val4, val4, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val6, val6, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val11, val11, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val4, val4, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val6, val6, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val11, val11, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val8, val4, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val10, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val12, val6, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val4, val9, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val6, val7, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val11, val5, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu32 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu30), val2, alu29), val3, alu31)*1241)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu30, 16843009u), alu29, 16843009u), alu31, 16843009u)*-168776)+alu10+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu24), val2, alu23), val3, alu25)*1241)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu24, 16843009u), alu23, 16843009u), alu25, 16843009u)*-168776)+alu10+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu21), val2, alu20), val3, alu22)*1241)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu21, 16843009u), alu20, 16843009u), alu22, 16843009u)*-168776)+alu10+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu28), val2, alu26), val3, alu27)*1241)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu28, 16843009u), alu26, 16843009u), alu27, 16843009u)*-168776)+alu10+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu15+alu6+alu11)))) = alu32; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_196_24_8_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 val0 = *((int32*)((data3+0))); | |
int32 alu0 = (val0*109); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|48); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:98); | |
int alu4 = (alu2?98:196); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu5); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu5, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu5+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 24; ridx1++) { | |
int alu11 = (alu5+((ridx1>>2)*25088)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu19+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu20 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu21 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu20))), val6, (*((unsigned int*)&alu24))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, (*((unsigned int*)&alu21))), val6, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, (*((unsigned int*)&alu22))), val6, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, (*((unsigned int*)&alu23))), val6, (*((unsigned int*)&alu27))); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu35 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*109)+(acc1[1]*-14279)+alu0+16383)/32768)+136), ((((acc4*109)+(acc1[0]*-14279)+alu0+16383)/32768)+136)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*109)+(acc0[1]*-14279)+alu0+16383)/32768)+136), ((((acc2*109)+(acc0[0]*-14279)+alu0+16383)/32768)+136))); | |
*((unsigned_char128*)((data0+alu5))) = alu35; | |
} | |
} | |
__attribute__((noinline)) void E_196_128n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:98); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|8); | |
int alu5 = (alu0?98:196); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*530)+(cast6*771)+15624)/16384)*583863)+16418)/32768)+-76), (((((((cast5*530)+(cast4*771)+15624)/16384)*583863)+16418)/32768)+-76)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*530)+(cast2*771)+15624)/16384)*583863)+16418)/32768)+-76), (((((((cast1*530)+(cast0*771)+15624)/16384)*583863)+16418)/32768)+-76))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_196_4_8_32_4_6n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|48); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:98); | |
int alu3 = (alu1?98:196); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu4); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu4, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu4+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
int32 acc22 = cast0; | |
int32 acc23 = cast0; | |
int32 acc24 = cast0; | |
int32 acc25 = cast0; | |
int32 acc26 = cast0; | |
int32 acc27 = cast0; | |
int32 acc28 = cast0; | |
int32 acc29 = cast0; | |
int32 acc30 = cast0; | |
int32 acc31 = cast0; | |
for (int ridx1 = 0; ridx1 < 4; ridx1++) { | |
int alu10 = (ridx1<<8); | |
int alu11 = (alu4+(ridx1<<3)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu10))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+(alu10+128)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+(alu10+1024)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu10+1152)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu10+2048)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu10+2176)))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu10+3072)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu10+3200)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu10+4096)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu10+4224)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu10+5120)))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+(alu10+5248)))); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu11+16)); | |
unsigned_char8 val12 = *((unsigned_char8*)((data1+alu11))); | |
int alu13 = (alu11+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu13+16)); | |
unsigned_char8 val13 = *((unsigned_char8*)((data1+alu13))); | |
int alu15 = (alu11+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val14 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu11+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val15 = *((unsigned_char8*)((data1+alu17))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
acc26 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc26, val0, 16843009u), val1, 16843009u); | |
acc27 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc27, val2, 16843009u), val3, 16843009u); | |
acc28 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc28, val4, 16843009u), val5, 16843009u); | |
acc29 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc29, val6, 16843009u), val7, 16843009u); | |
acc30 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc30, val8, 16843009u), val9, 16843009u); | |
acc31 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc31, val10, 16843009u), val11, 16843009u); | |
unsigned_char4 alu25 = __builtin_shufflevector(val12, val12, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val13, val13, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val14, val14, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val15, val15, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val12, val12, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val13, val13, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val14, val14, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val15, val15, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val0, cast2), val1, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val0, cast3), val1, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val0, cast4), val1, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val0, cast5), val1, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val2, cast2), val3, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val2, cast3), val3, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val2, cast4), val3, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val2, cast5), val3, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val4, cast2), val5, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val4, cast3), val5, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val4, cast4), val5, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val4, cast5), val5, cast9); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val6, cast2), val7, cast6); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val6, cast3), val7, cast7); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val6, cast4), val7, cast8); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val6, cast5), val7, cast9); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val8, cast2), val9, cast6); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val8, cast3), val9, cast7); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val8, cast4), val9, cast8); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val8, cast5), val9, cast9); | |
acc22 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc22, val10, cast2), val11, cast6); | |
acc23 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc23, val10, cast3), val11, cast7); | |
acc24 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc24, val10, cast4), val11, cast8); | |
acc25 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc25, val10, cast5), val11, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val12)), (*((long long*)&val13))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val14)), (*((long long*)&val15))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val16 = *((int128*)((data3+0))); | |
int64 val17 = *((int64*)((data3+128))); | |
int32 alu60 = __builtin_shufflevector(val17, val17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu61 = (alu60*261); | |
int32 alu62 = __builtin_shufflevector(val16, val16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu63 = (alu62*261); | |
int32 alu64 = __builtin_shufflevector(val17, val17, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu65 = (alu64*261); | |
int32 alu66 = __builtin_shufflevector(val16, val16, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu67 = (alu66*261); | |
int32 alu68 = __builtin_shufflevector(val16, val16, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu69 = (alu68*261); | |
int32 alu70 = __builtin_shufflevector(val16, val16, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu71 = (alu70*261); | |
int32 alu72 = (acc26*-31320); | |
int32 alu73 = (acc27*-31320); | |
int32 alu74 = (acc28*-31320); | |
int32 alu75 = (acc29*-31320); | |
int32 alu76 = (acc30*-31320); | |
int32 alu77 = (acc31*-31320); | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*261)+alu72+(acc1[1]*-30015)+alu63+16383)/32768), (((acc4*261)+alu72+(acc1[0]*-30015)+alu63+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*261)+alu72+(acc0[1]*-30015)+alu63+16383)/32768), (((acc2*261)+alu72+(acc0[0]*-30015)+alu63+16383)/32768))); | |
*((unsigned_char128*)((data0+alu4))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*261)+alu73+(acc1[1]*-30015)+alu67+16383)/32768), (((acc8*261)+alu73+(acc1[0]*-30015)+alu67+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*261)+alu73+(acc0[1]*-30015)+alu67+16383)/32768), (((acc6*261)+alu73+(acc0[0]*-30015)+alu67+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu4+25088)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*261)+alu74+(acc1[1]*-30015)+alu69+16383)/32768), (((acc12*261)+alu74+(acc1[0]*-30015)+alu69+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*261)+alu74+(acc0[1]*-30015)+alu69+16383)/32768), (((acc10*261)+alu74+(acc0[0]*-30015)+alu69+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu4+50176)))) = alu82; | |
unsigned_char128 alu84 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*261)+alu75+(acc1[1]*-30015)+alu71+16383)/32768), (((acc16*261)+alu75+(acc1[0]*-30015)+alu71+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*261)+alu75+(acc0[1]*-30015)+alu71+16383)/32768), (((acc14*261)+alu75+(acc0[0]*-30015)+alu71+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu4+75264)))) = alu84; | |
unsigned_char128 alu86 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*261)+alu76+(acc1[1]*-30015)+alu61+16383)/32768), (((acc20*261)+alu76+(acc1[0]*-30015)+alu61+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*261)+alu76+(acc0[1]*-30015)+alu61+16383)/32768), (((acc18*261)+alu76+(acc0[0]*-30015)+alu61+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu4+100352)))) = alu86; | |
unsigned_char128 alu88 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc25*261)+alu77+(acc1[1]*-30015)+alu65+16383)/32768), (((acc24*261)+alu77+(acc1[0]*-30015)+alu65+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc23*261)+alu77+(acc0[1]*-30015)+alu65+16383)/32768), (((acc22*261)+alu77+(acc0[0]*-30015)+alu65+16383)/32768))); | |
*((unsigned_char128*)((data0+(alu4+125440)))) = alu88; | |
} | |
} | |
__attribute__((noinline)) void r_14_6_4_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|18); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:7); | |
int alu4 = (alu2?7:14); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*1792); | |
_Bool alu6 = ((ridx0<1)!=1); | |
for (int ridx1 = 0; ridx1 < 6; ridx1++) { | |
int alu7 = (ridx1*384); | |
int alu8 = (alu7+128); | |
int alu9 = (alu7+256); | |
int alu10 = (ridx1<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu7))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu8))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu9))); | |
int32 val3 = *((int32*)((data3+alu10))); | |
int32 alu11 = (val3*636); | |
int alu12 = (ridx1*25088); | |
__builtin_HEXAGON_Y4_l2fetch(data1+(alu12+alu5), 0x808000|17); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu14 = (ridx2<3); | |
_Bool alu15 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu14?*((unsigned_char128*)((data2+alu7))):cast0); | |
unsigned_char128 val5 = (alu14?*((unsigned_char128*)((data2+alu8))):cast0); | |
unsigned_char128 val6 = (alu14?*((unsigned_char128*)((data2+alu9))):cast0); | |
int32 val7 = (alu14?*((int32*)((data3+alu10))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu16 = (val7*636); | |
_Bool alu17 = (alu14&alu6); | |
int alu18 = (alu12+alu5+(ridx2<<8)); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu18))); | |
unsigned_char128 val9 = ((alu15&alu6)?*((unsigned_char128*)((data1+(alu18+-928)))):cast0); | |
unsigned_char128 val10 = (alu6?*((unsigned_char128*)((data1+(alu18+-896)))):cast0); | |
unsigned_char128 val11 = (alu17?*((unsigned_char128*)((data1+(alu18+-800)))):cast0); | |
unsigned_char128 val12 = (alu17?*((unsigned_char128*)((data1+(alu18+-672)))):cast0); | |
unsigned_char128 val13 = (alu15?*((unsigned_char128*)((data1+(alu18+-32)))):cast0); | |
unsigned_char128 val14 = (alu14?*((unsigned_char128*)((data1+(alu18+96)))):cast0); | |
unsigned_char128 val15 = (alu14?*((unsigned_char128*)((data1+(alu18+224)))):cast0); | |
unsigned_char128 val16 = (alu15?*((unsigned_char128*)((data1+(alu18+864)))):cast0); | |
unsigned_char128 val17 = *((unsigned_char128*)((data1+(alu18+896)))); | |
unsigned_char128 val18 = (alu14?*((unsigned_char128*)((data1+(alu18+992)))):cast0); | |
unsigned_char128 val19 = (alu14?*((unsigned_char128*)((data1+(alu18+1120)))):cast0); | |
unsigned_char128 alu19 = __builtin_shufflevector(val11, val11, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu20 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val18, val18, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val8, val8, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val10, val10, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val17, val17, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val9, val10, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val16, val17, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val11, val12, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val14, val15, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val18, val19, 64, 96, 128, -1, 65, 97, 129, -1, 66, 98, 130, -1, 67, 99, 131, -1, 68, 100, 132, -1, 69, 101, 133, -1, 70, 102, 134, -1, 71, 103, 135, -1, 72, 104, 136, -1, 73, 105, 137, -1, 74, 106, 138, -1, 75, 107, 139, -1, 76, 108, 140, -1, 77, 109, 141, -1, 78, 110, 142, -1, 79, 111, 143, -1, 80, 112, 144, -1, 81, 113, 145, -1, 82, 114, 146, -1, 83, 115, 147, -1, 84, 116, 148, -1, 85, 117, 149, -1, 86, 118, 150, -1, 87, 119, 151, -1, 88, 120, 152, -1, 89, 121, 153, -1, 90, 122, 154, -1, 91, 123, 155, -1, 92, 124, 156, -1, 93, 125, 157, -1, 94, 126, 158, -1, 95, 127, 159, -1); | |
unsigned_char128 alu31 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu28), val5, alu29), val6, alu30)*636)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu28, 16843009u), alu29, 16843009u), alu30, 16843009u)*-89040)+alu16+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu19), val5, alu20), val6, alu21)*636)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu19, 16843009u), alu20, 16843009u), alu21, 16843009u)*-89040)+alu16+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu23), val1, alu22), val2, alu24)*636)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu23, 16843009u), alu22, 16843009u), alu24, 16843009u)*-89040)+alu11+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu25), val1, alu26), val2, alu27)*636)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu25, 16843009u), alu26, 16843009u), alu27, 16843009u)*-89040)+alu11+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx2<<7)+(ridx0*448)+(ridx1*6272))))) = alu31; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_24_8_32_4_2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int64 val0 = *((int64*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*203); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*203); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|97); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu5 = ((g0!=0)!=1); | |
int alu6 = (alu5?0:24); | |
int alu7 = (alu5?24:49); | |
for (int ridx0 = alu6; ridx0 < alu7; ridx0++) { | |
int alu8 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu8, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
for (int ridx1 = 0; ridx1 < 24; ridx1++) { | |
int alu14 = (alu8+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu14+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu14))); | |
int alu16 = (alu14+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu14+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu14+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu22+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu22+6144)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu22+6272)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu23 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu23)); | |
unsigned_char4 alu24 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu30)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu42 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*203)+(acc1[1]*-23548)+alu1+32767)/65536)+126), ((((acc4*203)+(acc1[0]*-23548)+alu1+32767)/65536)+126)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*203)+(acc0[1]*-23548)+alu1+32767)/65536)+126), ((((acc2*203)+(acc0[0]*-23548)+alu1+32767)/65536)+126))); | |
*((unsigned_char128*)((data0+alu8))) = alu42; | |
unsigned_char128 alu44 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*203)+(acc1[1]*-23548)+alu3+32767)/65536)+126), ((((acc8*203)+(acc1[0]*-23548)+alu3+32767)/65536)+126)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*203)+(acc0[1]*-23548)+alu3+32767)/65536)+126), ((((acc6*203)+(acc0[0]*-23548)+alu3+32767)/65536)+126))); | |
*((unsigned_char128*)((data0+(alu8+6272)))) = alu44; | |
} | |
} | |
__attribute__((noinline)) void r_12_49_8_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:6); | |
int alu3 = (alu1?6:12); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<11); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|33); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 8; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*443); | |
int32 alu38 = (acc2*-55818); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*443)+alu38+(acc1[1]*-54046)+alu37+32767)/65536), (((acc5*443)+alu38+(acc1[0]*-54046)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*443)+alu38+(acc0[1]*-54046)+alu37+32767)/65536), (((acc3*443)+alu38+(acc0[0]*-54046)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_12_14_4_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|36); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:6); | |
int alu4 = (alu2?6:12); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*1314); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*1314); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*1314)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-136656)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*1314)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-136656)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*1314)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-136656)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*1314)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-136656)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_48_8_32_4_2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int64 val0 = *((int64*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*141); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*141); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu5 = ((g0!=0)!=1); | |
int alu6 = (alu5?0:24); | |
int alu7 = (alu5?24:49); | |
for (int ridx0 = alu6; ridx0 < alu7; ridx0++) { | |
int alu8 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu8, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
for (int ridx1 = 0; ridx1 < 48; ridx1++) { | |
int alu14 = (alu8+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu14+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu14))); | |
int alu16 = (alu14+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu14+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu14+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu22+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu22+12288)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu22+12416)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu23 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu23)); | |
unsigned_char4 alu24 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu30)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu42 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*141)+(acc1[1]*-16356)+alu1+32767)/65536)+138), ((((acc4*141)+(acc1[0]*-16356)+alu1+32767)/65536)+138)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*141)+(acc0[1]*-16356)+alu1+32767)/65536)+138), ((((acc2*141)+(acc0[0]*-16356)+alu1+32767)/65536)+138))); | |
*((unsigned_char128*)((data0+alu8))) = alu42; | |
unsigned_char128 alu44 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*141)+(acc1[1]*-16356)+alu3+32767)/65536)+138), ((((acc8*141)+(acc1[0]*-16356)+alu3+32767)/65536)+138)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*141)+(acc0[1]*-16356)+alu3+32767)/65536)+138), ((((acc6*141)+(acc0[0]*-16356)+alu3+32767)/65536)+138))); | |
*((unsigned_char128*)((data0+(alu8+6272)))) = alu44; | |
} | |
} | |
__attribute__((noinline)) void E_98_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:49); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|99); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|99); | |
int alu5 = (alu0?49:98); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*688)+(cast6*1037)+3770)/32768)*1018263)+31966)/32768)+-91), (((((((cast5*688)+(cast4*1037)+3770)/32768)*1018263)+31966)/32768)+-91)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*688)+(cast2*1037)+3770)/32768)*1018263)+31966)/32768)+-91), (((((((cast1*688)+(cast0*1037)+3770)/32768)*1018263)+31966)/32768)+-91))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_12_49_8_8_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:6); | |
int alu3 = (alu1?6:12); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<11); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|33); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 8; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*265); | |
int32 alu38 = (acc2*-33655); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*265)+alu38+(acc1[1]*-31005)+alu37+32767)/65536), (((acc5*265)+alu38+(acc1[0]*-31005)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*265)+alu38+(acc0[1]*-31005)+alu37+32767)/65536), (((acc3*265)+alu38+(acc0[0]*-31005)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_12_14_4_3_3_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|36); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:6); | |
int alu4 = (alu2?6:12); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*3736); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*3736); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*3736)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-392280)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*3736)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-392280)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*3736)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-392280)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*3736)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-392280)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_48_8_32_4_2n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int64 val0 = *((int64*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*185); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*185); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu5 = ((g0!=0)!=1); | |
int alu6 = (alu5?0:24); | |
int alu7 = (alu5?24:49); | |
for (int ridx0 = alu6; ridx0 < alu7; ridx0++) { | |
int alu8 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu8, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
for (int ridx1 = 0; ridx1 < 48; ridx1++) { | |
int alu14 = (alu8+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu14+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu14))); | |
int alu16 = (alu14+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu14+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu14+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu22+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu22+12288)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu22+12416)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu23 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu23)); | |
unsigned_char4 alu24 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu30)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu42 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*185)+(acc1[1]*-24050)+alu1+32767)/65536)+132), ((((acc4*185)+(acc1[0]*-24050)+alu1+32767)/65536)+132)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*185)+(acc0[1]*-24050)+alu1+32767)/65536)+132), ((((acc2*185)+(acc0[0]*-24050)+alu1+32767)/65536)+132))); | |
*((unsigned_char128*)((data0+alu8))) = alu42; | |
unsigned_char128 alu44 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*185)+(acc1[1]*-24050)+alu3+32767)/65536)+132), ((((acc8*185)+(acc1[0]*-24050)+alu3+32767)/65536)+132)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*185)+(acc0[1]*-24050)+alu3+32767)/65536)+132), ((((acc6*185)+(acc0[0]*-24050)+alu3+32767)/65536)+132))); | |
*((unsigned_char128*)((data0+(alu8+6272)))) = alu44; | |
} | |
} | |
__attribute__((noinline)) void E_98_128n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:49); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|99); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|99); | |
int alu5 = (alu0?49:98); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*513)+(cast6*1054)+27802)/32768)*966925)+30884)/32768)+-81), (((((((cast5*513)+(cast4*1054)+27802)/32768)*966925)+30884)/32768)+-81)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*513)+(cast2*1054)+27802)/32768)*966925)+30884)/32768)+-81), (((((((cast1*513)+(cast0*1054)+27802)/32768)*966925)+30884)/32768)+-81))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_12_49_8_8_32_4n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:6); | |
int alu3 = (alu1?6:12); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<11); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|33); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 8; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*247); | |
int32 alu38 = (acc2*-31122); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*247)+alu38+(acc1[1]*-30134)+alu37+32767)/65536), (((acc5*247)+alu38+(acc1[0]*-30134)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*247)+alu38+(acc0[1]*-30134)+alu37+32767)/65536), (((acc3*247)+alu38+(acc0[0]*-30134)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_12_14_4_3_3_32_4n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|36); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:6); | |
int alu4 = (alu2?6:12); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*869); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*869); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*869)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-113839)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*869)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-113839)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*869)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-113839)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*869)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-113839)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_48_8_32_4_2n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int64 val0 = *((int64*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*115); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*115); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu5 = ((g0!=0)!=1); | |
int alu6 = (alu5?0:24); | |
int alu7 = (alu5?24:49); | |
for (int ridx0 = alu6; ridx0 < alu7; ridx0++) { | |
int alu8 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu8, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
for (int ridx1 = 0; ridx1 < 48; ridx1++) { | |
int alu14 = (alu8+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu14+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu14))); | |
int alu16 = (alu14+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu14+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu14+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu22+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu22+12288)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu22+12416)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu23 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu23)); | |
unsigned_char4 alu24 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu24)); | |
unsigned_char4 alu25 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu30)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu42 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*115)+(acc1[1]*-16675)+alu1+16383)/32768)+159), ((((acc4*115)+(acc1[0]*-16675)+alu1+16383)/32768)+159)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*115)+(acc0[1]*-16675)+alu1+16383)/32768)+159), ((((acc2*115)+(acc0[0]*-16675)+alu1+16383)/32768)+159))); | |
*((unsigned_char128*)((data0+alu8))) = alu42; | |
unsigned_char128 alu44 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*115)+(acc1[1]*-16675)+alu3+16383)/32768)+159), ((((acc8*115)+(acc1[0]*-16675)+alu3+16383)/32768)+159)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*115)+(acc0[1]*-16675)+alu3+16383)/32768)+159), ((((acc6*115)+(acc0[0]*-16675)+alu3+16383)/32768)+159))); | |
*((unsigned_char128*)((data0+(alu8+6272)))) = alu44; | |
} | |
} | |
__attribute__((noinline)) void E_98_128n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:49); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|99); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|99); | |
int alu5 = (alu0?49:98); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*2137)+(cast6*2220)+35857)/65536)*205767)+2617)/8192)+-119), (((((((cast5*2137)+(cast4*2220)+35857)/65536)*205767)+2617)/8192)+-119)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*2137)+(cast2*2220)+35857)/65536)*205767)+2617)/8192)+-119), (((((((cast1*2137)+(cast0*2220)+35857)/65536)*205767)+2617)/8192)+-119))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_12_49_8_8_32_4n3(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:6); | |
int alu3 = (alu1?6:12); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<11); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|33); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 8; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*377); | |
int32 alu38 = (acc2*-49764); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*377)+alu38+(acc1[1]*-50518)+alu37+32767)/65536), (((acc5*377)+alu38+(acc1[0]*-50518)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*377)+alu38+(acc0[1]*-50518)+alu37+32767)/65536), (((acc3*377)+alu38+(acc0[0]*-50518)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_12_14_4_3_3_32_4n3(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|36); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:6); | |
int alu4 = (alu2?6:12); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*1594); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*1594); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*1594)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-226348)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*1594)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-226348)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*1594)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-226348)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*1594)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-226348)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_48_8_32_4_3(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int96 val0 = *((int96*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*139); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*139); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu5 = (alu4*139); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu7 = ((g0!=0)!=1); | |
int alu8 = (alu7?0:24); | |
int alu9 = (alu7?24:49); | |
for (int ridx0 = alu8; ridx0 < alu9; ridx0++) { | |
int alu10 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu10); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu10, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
for (int ridx1 = 0; ridx1 < 48; ridx1++) { | |
int alu16 = (alu10+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu16+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu16+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (alu16+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu22))); | |
int alu24 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu24+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu24+12288)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu24+12416)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu24+24576)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu24+24704)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val9, cast2), val10, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val9, cast3), val10, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val9, cast4), val10, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val9, cast5), val10, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu48 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*139)+(acc1[1]*-15012)+alu1+32767)/65536)+128), ((((acc4*139)+(acc1[0]*-15012)+alu1+32767)/65536)+128)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*139)+(acc0[1]*-15012)+alu1+32767)/65536)+128), ((((acc2*139)+(acc0[0]*-15012)+alu1+32767)/65536)+128))); | |
*((unsigned_char128*)((data0+alu10))) = alu48; | |
unsigned_char128 alu50 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*139)+(acc1[1]*-15012)+alu3+32767)/65536)+128), ((((acc8*139)+(acc1[0]*-15012)+alu3+32767)/65536)+128)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*139)+(acc0[1]*-15012)+alu3+32767)/65536)+128), ((((acc6*139)+(acc0[0]*-15012)+alu3+32767)/65536)+128))); | |
*((unsigned_char128*)((data0+(alu10+6272)))) = alu50; | |
unsigned_char128 alu52 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc13*139)+(acc1[1]*-15012)+alu5+32767)/65536)+128), ((((acc12*139)+(acc1[0]*-15012)+alu5+32767)/65536)+128)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc11*139)+(acc0[1]*-15012)+alu5+32767)/65536)+128), ((((acc10*139)+(acc0[0]*-15012)+alu5+32767)/65536)+128))); | |
*((unsigned_char128*)((data0+(alu10+12544)))) = alu52; | |
} | |
} | |
__attribute__((noinline)) void r_18_49_12_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:9); | |
int alu3 = (alu1?9:18); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0*3072); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|49); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 12; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*323); | |
int32 alu38 = (acc2*-41344); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*323)+alu38+(acc1[1]*-42959)+alu37+32767)/65536), (((acc5*323)+alu38+(acc1[0]*-42959)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*323)+alu38+(acc0[1]*-42959)+alu37+32767)/65536), (((acc3*323)+alu38+(acc0[0]*-42959)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_18_14_4_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|54); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:9); | |
int alu4 = (alu2?9:18); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*1349); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*1349); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*1349)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-190209)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*1349)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-190209)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*1349)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-190209)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*1349)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-190209)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_72_8_32_4_3(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int96 val0 = *((int96*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*47); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*47); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu5 = (alu4*47); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu7 = ((g0!=0)!=1); | |
int alu8 = (alu7?0:24); | |
int alu9 = (alu7?24:49); | |
for (int ridx0 = alu8; ridx0 < alu9; ridx0++) { | |
int alu10 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu10); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu10, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
for (int ridx1 = 0; ridx1 < 72; ridx1++) { | |
int alu16 = (alu10+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu16+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu16+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (alu16+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu22))); | |
int alu24 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu24+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu24+18432)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu24+18560)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu24+36864)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu24+36992)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val9, cast2), val10, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val9, cast3), val10, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val9, cast4), val10, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val9, cast5), val10, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu48 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*47)+(acc1[1]*-6439)+alu1+8191)/16384)+113), ((((acc4*47)+(acc1[0]*-6439)+alu1+8191)/16384)+113)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*47)+(acc0[1]*-6439)+alu1+8191)/16384)+113), ((((acc2*47)+(acc0[0]*-6439)+alu1+8191)/16384)+113))); | |
*((unsigned_char128*)((data0+alu10))) = alu48; | |
unsigned_char128 alu50 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*47)+(acc1[1]*-6439)+alu3+8191)/16384)+113), ((((acc8*47)+(acc1[0]*-6439)+alu3+8191)/16384)+113)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*47)+(acc0[1]*-6439)+alu3+8191)/16384)+113), ((((acc6*47)+(acc0[0]*-6439)+alu3+8191)/16384)+113))); | |
*((unsigned_char128*)((data0+(alu10+6272)))) = alu50; | |
unsigned_char128 alu52 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc13*47)+(acc1[1]*-6439)+alu5+8191)/16384)+113), ((((acc12*47)+(acc1[0]*-6439)+alu5+8191)/16384)+113)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc11*47)+(acc0[1]*-6439)+alu5+8191)/16384)+113), ((((acc10*47)+(acc0[0]*-6439)+alu5+8191)/16384)+113))); | |
*((unsigned_char128*)((data0+(alu10+12544)))) = alu52; | |
} | |
} | |
__attribute__((noinline)) void E_147_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:73); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|8); | |
int alu5 = (alu0?73:147); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*982)+(cast6*893)+4106)/32768)*920265)+29824)/32768)+-74), (((((((cast5*982)+(cast4*893)+4106)/32768)*920265)+29824)/32768)+-74)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*982)+(cast2*893)+4106)/32768)*920265)+29824)/32768)+-74), (((((((cast1*982)+(cast0*893)+4106)/32768)*920265)+29824)/32768)+-74))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_18_49_12_8_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:9); | |
int alu3 = (alu1?9:18); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0*3072); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|49); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 12; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*173); | |
int32 alu38 = (acc2*-21279); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*173)+alu38+(acc1[1]*-17992)+alu37+32767)/65536), (((acc5*173)+alu38+(acc1[0]*-17992)+alu37+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*173)+alu38+(acc0[1]*-17992)+alu37+32767)/65536), (((acc3*173)+alu38+(acc0[0]*-17992)+alu37+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_18_14_4_3_3_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|54); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:9); | |
int alu4 = (alu2?9:18); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*3022); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 14; ridx1++) { | |
int alu11 = (ridx1*7); | |
int alu12 = (ridx1*448); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<13); | |
int alu15 = (alu10+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+8)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+9)%98)<<6)+alu10), 0x808000|9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+((((alu11+90)%98)<<6)+alu10), 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 4; ridx2++) { | |
_Bool alu20 = (ridx2<3); | |
int alu21 = (ridx2<<7); | |
_Bool alu22 = ((ridx2<1)!=1); | |
unsigned_char128 val4 = (alu20?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu20?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu20?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu20?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu23 = (val7*3022); | |
int alu24 = (alu11+(ridx2<<1)); | |
_Bool alu25 = (alu20&alu14); | |
int alu26 = (alu10+alu12+alu21); | |
unsigned_char128 val8 = *((unsigned_char128*)((data1+alu26))); | |
unsigned_char128 val9 = (alu25?*((unsigned_char128*)((data1+((((alu24+8)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val10 = (alu25?*((unsigned_char128*)((data1+((((alu24+9)%98)<<6)+alu10)))):cast0); | |
unsigned_char128 val11 = (alu13?*((unsigned_char128*)((data1+(alu26+-448)))):cast0); | |
unsigned_char128 val12 = ((alu20&alu13)?*((unsigned_char128*)((data1+(alu26+-416)))):cast0); | |
unsigned_char128 val13 = (alu22?*((unsigned_char128*)((data1+(alu26+-32)))):cast0); | |
unsigned_char128 val14 = (alu20?*((unsigned_char128*)((data1+(alu26+32)))):cast0); | |
unsigned_char128 val15 = ((alu22&alu14)?*((unsigned_char128*)((data1+(alu26+416)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu26+448)))):cast0); | |
unsigned_char128 val17 = (alu25?*((unsigned_char128*)((data1+(alu26+480)))):cast0); | |
unsigned_char128 val18 = ((alu22&alu13)?*((unsigned_char128*)((data1+((((alu24+90)%98)<<6)+alu10+32)))):cast0); | |
unsigned_char128 alu27 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val12, val12, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val9, val10, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val9, 0, 32, 128, -1, 1, 33, 129, -1, 2, 34, 130, -1, 3, 35, 131, -1, 4, 36, 132, -1, 5, 37, 133, -1, 6, 38, 134, -1, 7, 39, 135, -1, 8, 40, 136, -1, 9, 41, 137, -1, 10, 42, 138, -1, 11, 43, 139, -1, 12, 44, 140, -1, 13, 45, 141, -1, 14, 46, 142, -1, 15, 47, 143, -1, 16, 48, 144, -1, 17, 49, 145, -1, 18, 50, 146, -1, 19, 51, 147, -1, 20, 52, 148, -1, 21, 53, 149, -1, 22, 54, 150, -1, 23, 55, 151, -1, 24, 56, 152, -1, 25, 57, 153, -1, 26, 58, 154, -1, 27, 59, 155, -1, 28, 60, 156, -1, 29, 61, 157, -1, 30, 62, 158, -1, 31, 63, 159, -1); | |
unsigned_char128 alu33 = __builtin_shufflevector(val8, val14, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu34 = __builtin_shufflevector(val11, val12, 0, 32, 160, -1, 1, 33, 161, -1, 2, 34, 162, -1, 3, 35, 163, -1, 4, 36, 164, -1, 5, 37, 165, -1, 6, 38, 166, -1, 7, 39, 167, -1, 8, 40, 168, -1, 9, 41, 169, -1, 10, 42, 170, -1, 11, 43, 171, -1, 12, 44, 172, -1, 13, 45, 173, -1, 14, 46, 174, -1, 15, 47, 175, -1, 16, 48, 176, -1, 17, 49, 177, -1, 18, 50, 178, -1, 19, 51, 179, -1, 20, 52, 180, -1, 21, 53, 181, -1, 22, 54, 182, -1, 23, 55, 183, -1, 24, 56, 184, -1, 25, 57, 185, -1, 26, 58, 186, -1, 27, 59, 187, -1, 28, 60, 188, -1, 29, 61, 189, -1, 30, 62, 190, -1, 31, 63, 191, -1); | |
unsigned_char128 alu35 = __builtin_shufflevector(val13, val8, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu36 = __builtin_shufflevector(val15, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu37 = __builtin_shufflevector(val17, val9, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu38 = __builtin_shufflevector(val18, val11, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu29), val5, alu30), val6, alu31)*3022)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu30, 16843009u), alu31, 16843009u)*-462366)+alu23+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu27), val5, alu28), val6, alu37)*3022)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu37, 16843009u)*-462366)+alu23+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu34), val1, alu33), val2, alu32)*3022)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu34, 16843009u), alu33, 16843009u), alu32, 16843009u)*-462366)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu38), val1, alu35), val2, alu36)*3022)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu38, 16843009u), alu35, 16843009u), alu36, 16843009u)*-462366)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu21+alu15)))) = alu39; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_49_72_8_32_4_3n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int96 val0 = *((int96*)((data3+0))); | |
int32 alu0 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu1 = (alu0*109); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = (alu2*109); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu5 = (alu4*109); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
int2 cast1 = (int2){0,0}; | |
_Bool alu7 = ((g0!=0)!=1); | |
int alu8 = (alu7?0:24); | |
int alu9 = (alu7?24:49); | |
for (int ridx0 = alu8; ridx0 < alu9; ridx0++) { | |
int alu10 = (ridx0<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu10); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu10, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu10+96)); | |
int2 acc0 = cast1; | |
int2 acc1 = cast1; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
for (int ridx1 = 0; ridx1 < 72; ridx1++) { | |
int alu16 = (alu10+((ridx1>>2)*6272)+((ridx1&3)<<3)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu16+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu16))); | |
int alu18 = (alu16+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu18))); | |
int alu20 = (alu16+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu20))); | |
int alu22 = (alu16+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val4 = *((unsigned_char8*)((data1+alu22))); | |
int alu24 = (ridx1<<8); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+(alu24+128)))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+(alu24+18432)))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+(alu24+18560)))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+(alu24+36864)))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+(alu24+36992)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned int cast2 = (*((unsigned int*)&alu25)); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast3 = (*((unsigned int*)&alu26)); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu27)); | |
unsigned_char4 alu28 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu28)); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned int cast6 = (*((unsigned int*)&alu29)); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast7 = (*((unsigned int*)&alu30)); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu31)); | |
unsigned_char4 alu32 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu32)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, cast2), val6, cast6); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val5, cast3), val6, cast7); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val5, cast4), val6, cast8); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val5, cast5), val6, cast9); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val7, cast2), val8, cast6); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val7, cast3), val8, cast7); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val7, cast4), val8, cast8); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val7, cast5), val8, cast9); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val9, cast2), val10, cast6); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val9, cast3), val10, cast7); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val9, cast4), val10, cast8); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val9, cast5), val10, cast9); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
unsigned_char128 alu48 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc5*109)+(acc1[1]*-14933)+alu1+16383)/32768)+142), ((((acc4*109)+(acc1[0]*-14933)+alu1+16383)/32768)+142)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc3*109)+(acc0[1]*-14933)+alu1+16383)/32768)+142), ((((acc2*109)+(acc0[0]*-14933)+alu1+16383)/32768)+142))); | |
*((unsigned_char128*)((data0+alu10))) = alu48; | |
unsigned_char128 alu50 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc9*109)+(acc1[1]*-14933)+alu3+16383)/32768)+142), ((((acc8*109)+(acc1[0]*-14933)+alu3+16383)/32768)+142)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc7*109)+(acc0[1]*-14933)+alu3+16383)/32768)+142), ((((acc6*109)+(acc0[0]*-14933)+alu3+16383)/32768)+142))); | |
*((unsigned_char128*)((data0+(alu10+6272)))) = alu50; | |
unsigned_char128 alu52 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc13*109)+(acc1[1]*-14933)+alu5+16383)/32768)+142), ((((acc12*109)+(acc1[0]*-14933)+alu5+16383)/32768)+142)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((acc11*109)+(acc0[1]*-14933)+alu5+16383)/32768)+142), ((((acc10*109)+(acc0[0]*-14933)+alu5+16383)/32768)+142))); | |
*((unsigned_char128*)((data0+(alu10+12544)))) = alu52; | |
} | |
} | |
__attribute__((noinline)) void E_147_128n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:73); | |
int alu2 = (alu1<<7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu2, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu2, 0x808000|8); | |
int alu5 = (alu0?73:147); | |
for (int ridx0 = alu1; ridx0 < alu5; ridx0++) { | |
int alu6 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu6))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char32 alu7 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast0 = __builtin_convertvector(alu7, int32); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val1, val1, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val1, val1, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast7*4222)+(cast6*2333)+31021)/65536)*227815)+13661)/16384)+-71), (((((((cast5*4222)+(cast4*2333)+31021)/65536)*227815)+13661)/16384)+-71)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((((((cast3*4222)+(cast2*2333)+31021)/65536)*227815)+13661)/16384)+-71), (((((((cast1*4222)+(cast0*2333)+31021)/65536)*227815)+13661)/16384)+-71))); | |
*((unsigned_char128*)((data0+alu6))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void r_18_49_12_8_32_4n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
int32 cast1 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:9); | |
int alu3 = (alu1?9:18); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0*3072); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|49); | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu6 = (ridx1<<7); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu6); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu6, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu6+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast1; | |
int32 acc3 = cast1; | |
int32 acc4 = cast1; | |
int32 acc5 = cast1; | |
int32 acc6 = cast1; | |
for (int ridx2 = 0; ridx2 < 12; ridx2++) { | |
int alu12 = (alu6+((ridx2>>2)*6272)+((ridx2&3)<<3)); | |
int alu13 = (alu4+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu12+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu12))); | |
int alu15 = (alu12+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu15))); | |
int alu17 = (alu12+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu19 = (alu12+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = *((unsigned_char8*)((data1+alu19))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu13))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu13+128)))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu21 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu22 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu23 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu24 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu26 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu27 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu28 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu21))), val5, (*((unsigned int*)&alu25))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val4, (*((unsigned int*)&alu22))), val5, (*((unsigned int*)&alu26))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val4, (*((unsigned int*)&alu23))), val5, (*((unsigned int*)&alu27))); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
} | |
int32 val6 = *((int32*)((data3+(ridx0<<5)))); | |
int32 alu37 = (val6*215); | |
int32 alu38 = (acc2*-26660); | |
unsigned_char128 alu39 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc6*215)+alu38+(acc1[1]*-26015)+alu37+16383)/32768), (((acc5*215)+alu38+(acc1[0]*-26015)+alu37+16383)/32768)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*215)+alu38+(acc0[1]*-26015)+alu37+16383)/32768), (((acc3*215)+alu38+(acc0[0]*-26015)+alu37+16383)/32768))); | |
*((unsigned_char128*)((data0+((ridx0*6272)+alu6)))) = alu39; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_18_7_2_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|54); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|5); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:9); | |
int alu4 = (alu2?9:18); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*671); | |
int alu10 = (ridx0*6272); | |
for (int ridx1 = 0; ridx1 < 7; ridx1++) { | |
int alu11 = (ridx1*896); | |
_Bool alu12 = ((ridx1<1)!=1); | |
int alu13 = (alu10+alu11); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu13, 0x808000|8); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu13, 0x808000|9); | |
for (int ridx2 = 0; ridx2 < 2; ridx2++) { | |
_Bool alu16 = (ridx2<1); | |
_Bool alu17 = (alu16!=1); | |
unsigned_char128 val4 = (alu16?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu16?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu16?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu16?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
unsigned_char128 val8 = ((alu16&alu12)?*((unsigned_char128*)((data1+(alu13+-288)))):cast0); | |
unsigned_char128 val9 = ((alu17&alu12)?*((unsigned_char128*)((data1+(alu13+-224)))):cast0); | |
unsigned_char128 val10 = (alu16?*((unsigned_char128*)((data1+(alu13+160)))):cast0); | |
unsigned_char128 val11 = (alu17?*((unsigned_char128*)((data1+(alu13+224)))):cast0); | |
unsigned_char128 val12 = (alu16?*((unsigned_char128*)((data1+(alu13+608)))):cast0); | |
unsigned_char128 val13 = (alu17?*((unsigned_char128*)((data1+(alu13+672)))):cast0); | |
int alu18 = (alu10+alu11+(ridx2<<8)); | |
unsigned_char128 val14 = *((unsigned_char128*)((data1+alu18))); | |
unsigned_char128 val15 = (alu12?*((unsigned_char128*)((data1+(alu18+-448)))):cast0); | |
unsigned_char128 val16 = (alu12?*((unsigned_char128*)((data1+(alu18+-320)))):cast0); | |
unsigned_char128 val17 = *((unsigned_char128*)((data1+(alu18+128)))); | |
unsigned_char128 val18 = *((unsigned_char128*)((data1+(alu18+448)))); | |
unsigned_char128 val19 = *((unsigned_char128*)((data1+(alu18+576)))); | |
unsigned_char128 alu19 = __builtin_shufflevector(val8, val8, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu20 = __builtin_shufflevector(val10, val10, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu21 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val14, val14, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val15, val15, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val18, val18, 32, 64, 96, -1, 33, 65, 97, -1, 34, 66, 98, -1, 35, 67, 99, -1, 36, 68, 100, -1, 37, 69, 101, -1, 38, 70, 102, -1, 39, 71, 103, -1, 40, 72, 104, -1, 41, 73, 105, -1, 42, 74, 106, -1, 43, 75, 107, -1, 44, 76, 108, -1, 45, 77, 109, -1, 46, 78, 110, -1, 47, 79, 111, -1, 48, 80, 112, -1, 49, 81, 113, -1, 50, 82, 114, -1, 51, 83, 115, -1, 52, 84, 116, -1, 53, 85, 117, -1, 54, 86, 118, -1, 55, 87, 119, -1, 56, 88, 120, -1, 57, 89, 121, -1, 58, 90, 122, -1, 59, 91, 123, -1, 60, 92, 124, -1, 61, 93, 125, -1, 62, 94, 126, -1, 63, 95, 127, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val9, val15, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val11, val14, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val13, val18, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val14, val17, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val15, val16, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val18, val19, 96, 128, 160, -1, 97, 129, 161, -1, 98, 130, 162, -1, 99, 131, 163, -1, 100, 132, 164, -1, 101, 133, 165, -1, 102, 134, 166, -1, 103, 135, 167, -1, 104, 136, 168, -1, 105, 137, 169, -1, 106, 138, 170, -1, 107, 139, 171, -1, 108, 140, 172, -1, 109, 141, 173, -1, 110, 142, 174, -1, 111, 143, 175, -1, 112, 144, 176, -1, 113, 145, 177, -1, 114, 146, 178, -1, 115, 147, 179, -1, 116, 148, 180, -1, 117, 149, 181, -1, 118, 150, 182, -1, 119, 151, 183, -1, 120, 152, 184, -1, 121, 153, 185, -1, 122, 154, 186, -1, 123, 155, 187, -1, 124, 156, 188, -1, 125, 157, 189, -1, 126, 158, 190, -1, 127, 159, 191, -1); | |
unsigned_char128 alu31 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu19), val5, alu20), val6, alu21)*671)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu19, 16843009u), alu20, 16843009u), alu21, 16843009u)*-75823)+(val7*671)+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu29), val1, alu28), val2, alu30)*671)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu29, 16843009u), alu28, 16843009u), alu30, 16843009u)*-75823)+alu9+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu23), val1, alu22), val2, alu24)*671)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu23, 16843009u), alu22, 16843009u), alu24, 16843009u)*-75823)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu25), val1, alu26), val2, alu27)*671)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu25, 16843009u), alu26, 16843009u), alu27, 16843009u)*-75823)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx2<<7)+(ridx0*1568)+(ridx1*224))))) = alu31; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_13_72_8_32_4_5(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int128 val0 = *((int128*)((data3+0))); | |
int32 val1 = *((int32*)((data3+128))); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
int32 alu1 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
unsigned_char128 cast1 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char8 cast2 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int2 cast3 = (int2){0,0}; | |
_Bool alu6 = ((g0!=0)!=1); | |
int alu7 = (alu6?0:6); | |
int alu8 = (alu6?6:13); | |
for (int ridx0 = alu7; ridx0 < alu8; ridx0++) { | |
int alu9 = (ridx0<<7); | |
_Bool alu10 = (ridx0<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu9, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+96)); | |
int2 acc0 = cast3; | |
int2 acc1 = cast3; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
for (int ridx1 = 0; ridx1 < 72; ridx1++) { | |
int alu16 = (ridx1<<8); | |
int alu17 = (alu9+((ridx1>>2)*1568)+((ridx1&3)<<3)); | |
int alu18 = (alu16+18432); | |
int alu19 = (alu16+36864); | |
int alu20 = (alu16+55296); | |
int alu21 = (alu16+73728); | |
int alu22 = (alu16+128); | |
int alu23 = (alu16+18560); | |
int alu24 = (alu16+36992); | |
int alu25 = (alu16+55424); | |
int alu26 = (alu16+73856); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu28 = (alu17+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu28+16)); | |
unsigned_char8 val3 = (alu10?*((unsigned_char8*)((data1+alu28))):cast2); | |
int alu30 = (alu17+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu30+16)); | |
unsigned_char8 val4 = (alu10?*((unsigned_char8*)((data1+alu30))):cast2); | |
int alu32 = (alu17+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu32+16)); | |
unsigned_char8 val5 = (alu10?*((unsigned_char8*)((data1+alu32))):cast2); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+alu16))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+alu18))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+alu23))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val12 = *((unsigned_char128*)((data2+alu20))); | |
unsigned_char128 val13 = *((unsigned_char128*)((data2+alu25))); | |
unsigned_char128 val14 = *((unsigned_char128*)((data2+alu21))); | |
unsigned_char128 val15 = *((unsigned_char128*)((data2+alu26))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char128 val16 = (alu10?*((unsigned_char128*)((data2+alu16))):cast1); | |
unsigned_char128 val17 = (alu10?*((unsigned_char128*)((data2+alu22))):cast1); | |
unsigned_char128 val18 = (alu10?*((unsigned_char128*)((data2+alu18))):cast1); | |
unsigned_char128 val19 = (alu10?*((unsigned_char128*)((data2+alu23))):cast1); | |
unsigned_char128 val20 = (alu10?*((unsigned_char128*)((data2+alu19))):cast1); | |
unsigned_char128 val21 = (alu10?*((unsigned_char128*)((data2+alu24))):cast1); | |
unsigned_char128 val22 = (alu10?*((unsigned_char128*)((data2+alu20))):cast1); | |
unsigned_char128 val23 = (alu10?*((unsigned_char128*)((data2+alu25))):cast1); | |
unsigned_char128 val24 = (alu10?*((unsigned_char128*)((data2+alu21))):cast1); | |
unsigned_char128 val25 = (alu10?*((unsigned_char128*)((data2+alu26))):cast1); | |
unsigned_char4 alu34 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu34)); | |
unsigned_char4 alu35 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu35)); | |
unsigned_char4 alu36 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast6 = (*((unsigned int*)&alu36)); | |
unsigned_char4 alu37 = __builtin_shufflevector(val5, val5, 0, 1, 2, 3); | |
unsigned int cast7 = (*((unsigned int*)&alu37)); | |
unsigned_char4 alu38 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu38)); | |
unsigned_char4 alu39 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu39)); | |
unsigned_char4 alu40 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast10 = (*((unsigned int*)&alu40)); | |
unsigned_char4 alu41 = __builtin_shufflevector(val5, val5, 4, 5, 6, 7); | |
unsigned int cast11 = (*((unsigned int*)&alu41)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val6, cast4), val7, cast8); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val16, cast5), val17, cast9); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val16, cast6), val17, cast10); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val16, cast7), val17, cast11); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val8, cast4), val9, cast8); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val18, cast5), val19, cast9); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val18, cast6), val19, cast10); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val18, cast7), val19, cast11); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val10, cast4), val11, cast8); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val20, cast5), val21, cast9); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val20, cast6), val21, cast10); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val20, cast7), val21, cast11); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val12, cast4), val13, cast8); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val22, cast5), val23, cast9); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val22, cast6), val23, cast10); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val22, cast7), val23, cast11); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val14, cast4), val15, cast8); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val24, cast5), val25, cast9); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val24, cast6), val25, cast10); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val24, cast7), val25, cast11); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val4)), (*((long long*)&val5))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val26 = (alu10?*((int128*)((data3+0))):(int128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 val27 = (alu10?*((int32*)((data3+128))):cast0); | |
int32 alu65 = (val27*53); | |
int32 alu66 = __builtin_shufflevector(val26, val26, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu67 = (alu66*53); | |
int32 alu68 = __builtin_shufflevector(val26, val26, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu69 = (alu68*53); | |
int32 alu70 = __builtin_shufflevector(val26, val26, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu71 = (alu70*53); | |
int32 alu72 = __builtin_shufflevector(val26, val26, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu73 = (alu72*53); | |
unsigned_char128 alu74 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*53)+(acc1[1]*-6625)+alu67+32767)/65536), (((acc4*53)+(acc1[0]*-6625)+alu67+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*53)+(acc0[1]*-6625)+alu67+32767)/65536), (((acc2*53)+(acc0[0]*-6625)+(alu1*53)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu9))) = alu74; | |
unsigned_char128 alu76 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*53)+(acc1[1]*-6625)+alu69+32767)/65536), (((acc8*53)+(acc1[0]*-6625)+alu69+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*53)+(acc0[1]*-6625)+alu69+32767)/65536), (((acc6*53)+(acc0[0]*-6625)+(alu2*53)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+1568)))) = alu76; | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*53)+(acc1[1]*-6625)+alu71+32767)/65536), (((acc12*53)+(acc1[0]*-6625)+alu71+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*53)+(acc0[1]*-6625)+alu71+32767)/65536), (((acc10*53)+(acc0[0]*-6625)+(alu3*53)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+3136)))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*53)+(acc1[1]*-6625)+alu73+32767)/65536), (((acc16*53)+(acc1[0]*-6625)+alu73+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*53)+(acc0[1]*-6625)+alu73+32767)/65536), (((acc14*53)+(acc0[0]*-6625)+(alu4*53)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+4704)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*53)+(acc1[1]*-6625)+alu65+32767)/65536), (((acc20*53)+(acc1[0]*-6625)+alu65+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*53)+(acc0[1]*-6625)+alu65+32767)/65536), (((acc18*53)+(acc0[0]*-6625)+(val1*53)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+6272)))) = alu82; | |
} | |
} | |
__attribute__((noinline)) void r_30_13_20_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|62); | |
unsigned_char8 cast1 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int32 cast2 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char128 cast3 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:15); | |
int alu4 = (alu2?15:30); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*5120); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|81); | |
int alu7 = (ridx0<<5); | |
for (int ridx1 = 0; ridx1 < 13; ridx1++) { | |
int alu8 = (ridx1<<7); | |
_Bool alu9 = (ridx1<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast2; | |
int32 acc3 = cast2; | |
int32 acc4 = cast2; | |
int32 acc5 = cast2; | |
int32 acc6 = cast2; | |
int32 acc7 = cast2; | |
for (int ridx2 = 0; ridx2 < 20; ridx2++) { | |
int alu14 = (alu5+(ridx2<<8)); | |
int alu15 = (alu8+((ridx2>>2)*1568)+((ridx2&3)<<3)); | |
int alu16 = (alu14+128); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu15))); | |
int alu18 = (alu15+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val1 = (alu9?*((unsigned_char8*)((data1+alu18))):cast1); | |
int alu20 = (alu15+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val2 = (alu9?*((unsigned_char8*)((data1+alu20))):cast1); | |
int alu22 = (alu15+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val3 = (alu9?*((unsigned_char8*)((data1+alu22))):cast1); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu14))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu16))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu24 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu28 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
unsigned_char128 val6 = (alu9?*((unsigned_char128*)((data2+alu14))):cast3); | |
unsigned_char128 val7 = (alu9?*((unsigned_char128*)((data2+alu16))):cast3); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val6, (*((unsigned int*)&alu25))), val7, (*((unsigned int*)&alu29))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val6, 16843009u), val7, 16843009u); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val6, (*((unsigned int*)&alu26))), val7, (*((unsigned int*)&alu30))); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val6, (*((unsigned int*)&alu27))), val7, (*((unsigned int*)&alu31))); | |
} | |
int32 val8 = *((int32*)((data3+alu7))); | |
int32 val9 = (alu9?*((int32*)((data3+alu7))):cast2); | |
int32 alu41 = (val9*517); | |
int32 alu42 = (acc5*-76516); | |
unsigned_char128 alu43 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*517)+alu42+(acc1[1]*-80652)+alu41+32767)/65536), (((acc6*517)+alu42+(acc1[0]*-80652)+alu41+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*517)+alu42+(acc0[1]*-80652)+alu41+32767)/65536), (((acc3*517)+(acc2*-76516)+(acc0[0]*-80652)+(val8*517)+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*1568)+alu8)))) = alu43; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_30_7_2_3_3_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:15); | |
__builtin_HEXAGON_Y4_l2fetch(data2+(alu1*384), 0x808000|91); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
int alu4 = (alu0?15:30); | |
for (int ridx0 = alu1; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*4411); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|8); | |
int alu11 = (ridx0*1568); | |
for (int ridx1 = 0; ridx1 < 7; ridx1++) { | |
int alu12 = (ridx1*224); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<6); | |
int alu15 = (alu11+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 2; ridx2++) { | |
_Bool alu17 = (ridx2<1); | |
int alu18 = (ridx2<<7); | |
_Bool alu19 = (alu17!=1); | |
unsigned_char128 val4 = (alu17?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu17?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu17?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu17?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
unsigned_char128 val8 = ((alu17&alu13)?*((unsigned_char128*)((data1+(alu15+-160)))):cast0); | |
unsigned_char128 val9 = ((alu19&alu13)?*((unsigned_char128*)((data1+(alu15+-128)))):cast0); | |
unsigned_char128 val10 = (alu17?*((unsigned_char128*)((data1+(alu15+64)))):cast0); | |
unsigned_char128 val11 = (alu19?*((unsigned_char128*)((data1+(alu15+96)))):cast0); | |
unsigned_char128 val12 = ((alu17&alu14)?*((unsigned_char128*)((data1+(alu15+288)))):cast0); | |
unsigned_char128 val13 = ((alu19&alu14)?*((unsigned_char128*)((data1+(alu15+320)))):cast0); | |
int alu20 = (alu11+alu12+alu18); | |
unsigned_char128 val14 = *((unsigned_char128*)((data1+alu20))); | |
unsigned_char128 val15 = (alu13?*((unsigned_char128*)((data1+(alu20+-224)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu20+224)))):cast0); | |
unsigned_char128 alu21 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val8, val8, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val10, val10, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val15, val15, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val16, val16, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val9, val15, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val11, val14, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val13, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val10, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val15, val8, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val12, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu33 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu22), val5, alu23), val6, alu24)*4411)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu22, 16843009u), alu23, 16843009u), alu24, 16843009u)*-599896)+(val7*4411)+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu31), val1, alu30), val2, alu32)*4411)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu31, 16843009u), alu30, 16843009u), alu32, 16843009u)*-599896)+alu9+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu25), val1, alu21), val2, alu26)*4411)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu25, 16843009u), alu21, 16843009u), alu26, 16843009u)*-599896)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu27), val1, alu28), val2, alu29)*4411)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu29, 16843009u)*-599896)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu18+alu15)))) = alu33; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_13_120_8_32_4_5(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int128 val0 = *((int128*)((data3+0))); | |
int32 val1 = *((int32*)((data3+128))); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
int32 alu1 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
unsigned_char128 cast1 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char8 cast2 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int2 cast3 = (int2){0,0}; | |
_Bool alu6 = ((g0!=0)!=1); | |
int alu7 = (alu6?0:6); | |
int alu8 = (alu6?6:13); | |
for (int ridx0 = alu7; ridx0 < alu8; ridx0++) { | |
int alu9 = (ridx0<<7); | |
_Bool alu10 = (ridx0<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu9, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+96)); | |
int2 acc0 = cast3; | |
int2 acc1 = cast3; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
for (int ridx1 = 0; ridx1 < 120; ridx1++) { | |
int alu16 = (ridx1<<8); | |
int alu17 = (alu9+((ridx1>>2)*1568)+((ridx1&3)<<3)); | |
int alu18 = (alu16+30720); | |
int alu19 = (alu16+61440); | |
int alu20 = (alu16+92160); | |
int alu21 = (alu16+122880); | |
int alu22 = (alu16+128); | |
int alu23 = (alu16+30848); | |
int alu24 = (alu16+61568); | |
int alu25 = (alu16+92288); | |
int alu26 = (alu16+123008); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu28 = (alu17+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu28+16)); | |
unsigned_char8 val3 = (alu10?*((unsigned_char8*)((data1+alu28))):cast2); | |
int alu30 = (alu17+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu30+16)); | |
unsigned_char8 val4 = (alu10?*((unsigned_char8*)((data1+alu30))):cast2); | |
int alu32 = (alu17+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu32+16)); | |
unsigned_char8 val5 = (alu10?*((unsigned_char8*)((data1+alu32))):cast2); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+alu16))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+alu18))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+alu23))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val12 = *((unsigned_char128*)((data2+alu20))); | |
unsigned_char128 val13 = *((unsigned_char128*)((data2+alu25))); | |
unsigned_char128 val14 = *((unsigned_char128*)((data2+alu21))); | |
unsigned_char128 val15 = *((unsigned_char128*)((data2+alu26))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char128 val16 = (alu10?*((unsigned_char128*)((data2+alu16))):cast1); | |
unsigned_char128 val17 = (alu10?*((unsigned_char128*)((data2+alu22))):cast1); | |
unsigned_char128 val18 = (alu10?*((unsigned_char128*)((data2+alu18))):cast1); | |
unsigned_char128 val19 = (alu10?*((unsigned_char128*)((data2+alu23))):cast1); | |
unsigned_char128 val20 = (alu10?*((unsigned_char128*)((data2+alu19))):cast1); | |
unsigned_char128 val21 = (alu10?*((unsigned_char128*)((data2+alu24))):cast1); | |
unsigned_char128 val22 = (alu10?*((unsigned_char128*)((data2+alu20))):cast1); | |
unsigned_char128 val23 = (alu10?*((unsigned_char128*)((data2+alu25))):cast1); | |
unsigned_char128 val24 = (alu10?*((unsigned_char128*)((data2+alu21))):cast1); | |
unsigned_char128 val25 = (alu10?*((unsigned_char128*)((data2+alu26))):cast1); | |
unsigned_char4 alu34 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu34)); | |
unsigned_char4 alu35 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu35)); | |
unsigned_char4 alu36 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast6 = (*((unsigned int*)&alu36)); | |
unsigned_char4 alu37 = __builtin_shufflevector(val5, val5, 0, 1, 2, 3); | |
unsigned int cast7 = (*((unsigned int*)&alu37)); | |
unsigned_char4 alu38 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu38)); | |
unsigned_char4 alu39 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu39)); | |
unsigned_char4 alu40 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast10 = (*((unsigned int*)&alu40)); | |
unsigned_char4 alu41 = __builtin_shufflevector(val5, val5, 4, 5, 6, 7); | |
unsigned int cast11 = (*((unsigned int*)&alu41)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val6, cast4), val7, cast8); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val16, cast5), val17, cast9); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val16, cast6), val17, cast10); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val16, cast7), val17, cast11); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val8, cast4), val9, cast8); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val18, cast5), val19, cast9); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val18, cast6), val19, cast10); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val18, cast7), val19, cast11); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val10, cast4), val11, cast8); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val20, cast5), val21, cast9); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val20, cast6), val21, cast10); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val20, cast7), val21, cast11); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val12, cast4), val13, cast8); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val22, cast5), val23, cast9); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val22, cast6), val23, cast10); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val22, cast7), val23, cast11); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val14, cast4), val15, cast8); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val24, cast5), val25, cast9); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val24, cast6), val25, cast10); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val24, cast7), val25, cast11); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val4)), (*((long long*)&val5))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val26 = (alu10?*((int128*)((data3+0))):(int128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 val27 = (alu10?*((int32*)((data3+128))):cast0); | |
int32 alu65 = (val27*57); | |
int32 alu66 = __builtin_shufflevector(val26, val26, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu67 = (alu66*57); | |
int32 alu68 = __builtin_shufflevector(val26, val26, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu69 = (alu68*57); | |
int32 alu70 = __builtin_shufflevector(val26, val26, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu71 = (alu70*57); | |
int32 alu72 = __builtin_shufflevector(val26, val26, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu73 = (alu72*57); | |
unsigned_char128 alu74 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*57)+(acc1[1]*-7638)+alu67+32767)/65536), (((acc4*57)+(acc1[0]*-7638)+alu67+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*57)+(acc0[1]*-7638)+alu67+32767)/65536), (((acc2*57)+(acc0[0]*-7638)+(alu1*57)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu9))) = alu74; | |
unsigned_char128 alu76 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*57)+(acc1[1]*-7638)+alu69+32767)/65536), (((acc8*57)+(acc1[0]*-7638)+alu69+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*57)+(acc0[1]*-7638)+alu69+32767)/65536), (((acc6*57)+(acc0[0]*-7638)+(alu2*57)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+1568)))) = alu76; | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*57)+(acc1[1]*-7638)+alu71+32767)/65536), (((acc12*57)+(acc1[0]*-7638)+alu71+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*57)+(acc0[1]*-7638)+alu71+32767)/65536), (((acc10*57)+(acc0[0]*-7638)+(alu3*57)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+3136)))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*57)+(acc1[1]*-7638)+alu73+32767)/65536), (((acc16*57)+(acc1[0]*-7638)+alu73+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*57)+(acc0[1]*-7638)+alu73+32767)/65536), (((acc14*57)+(acc0[0]*-7638)+(alu4*57)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+4704)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*57)+(acc1[1]*-7638)+alu65+32767)/65536), (((acc20*57)+(acc1[0]*-7638)+alu65+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*57)+(acc0[1]*-7638)+alu65+32767)/65536), (((acc18*57)+(acc0[0]*-7638)+(val1*57)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+6272)))) = alu82; | |
} | |
} | |
__attribute__((noinline)) void E_62_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|62); | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|62); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:31); | |
int alu4 = (alu2?31:62); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu5))); | |
int alu6 = (alu5+32); | |
_Bool alu7 = (ridx0<61); | |
unsigned_char128 val2 = (alu7?*((unsigned_char128*)((data1+alu6))):cast0); | |
unsigned_char128 val3 = (alu7?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char32 alu8 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val2, val2, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val3, val3, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val2, val2, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char32 alu15 = __builtin_shufflevector(val3, val3, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast8 = __builtin_convertvector(alu15, int32); | |
unsigned_char128 alu16 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((((cast8*2126)+(cast7*2317)+-566146)/32768)*534114)+32767)/65536), ((((((cast6*2126)+(cast5*2317)+-566146)/32768)*534114)+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((((cast4*2126)+(cast2*2317)+-566146)/32768)*534114)+32767)/65536), ((((((cast3*2126)+(cast1*2317)+-566146)/32768)*534114)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu5))) = alu16; | |
} | |
} | |
__attribute__((noinline)) void r_30_13_20_8_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|62); | |
unsigned_char8 cast1 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int32 cast2 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char128 cast3 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:15); | |
int alu4 = (alu2?15:30); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*5120); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|81); | |
int alu7 = (ridx0<<5); | |
for (int ridx1 = 0; ridx1 < 13; ridx1++) { | |
int alu8 = (ridx1<<7); | |
_Bool alu9 = (ridx1<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast2; | |
int32 acc3 = cast2; | |
int32 acc4 = cast2; | |
int32 acc5 = cast2; | |
int32 acc6 = cast2; | |
int32 acc7 = cast2; | |
for (int ridx2 = 0; ridx2 < 20; ridx2++) { | |
int alu14 = (alu5+(ridx2<<8)); | |
int alu15 = (alu8+((ridx2>>2)*1568)+((ridx2&3)<<3)); | |
int alu16 = (alu14+128); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu15))); | |
int alu18 = (alu15+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val1 = (alu9?*((unsigned_char8*)((data1+alu18))):cast1); | |
int alu20 = (alu15+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val2 = (alu9?*((unsigned_char8*)((data1+alu20))):cast1); | |
int alu22 = (alu15+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val3 = (alu9?*((unsigned_char8*)((data1+alu22))):cast1); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu14))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu16))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu24 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu28 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
unsigned_char128 val6 = (alu9?*((unsigned_char128*)((data2+alu14))):cast3); | |
unsigned_char128 val7 = (alu9?*((unsigned_char128*)((data2+alu16))):cast3); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val6, (*((unsigned int*)&alu25))), val7, (*((unsigned int*)&alu29))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val6, 16843009u), val7, 16843009u); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val6, (*((unsigned int*)&alu26))), val7, (*((unsigned int*)&alu30))); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val6, (*((unsigned int*)&alu27))), val7, (*((unsigned int*)&alu31))); | |
} | |
int32 val8 = *((int32*)((data3+alu7))); | |
int32 val9 = (alu9?*((int32*)((data3+alu7))):cast2); | |
int32 alu41 = (val9*637); | |
int32 alu42 = (acc5*-75166); | |
unsigned_char128 alu43 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*637)+alu42+(acc1[1]*-77077)+alu41+32767)/65536), (((acc6*637)+alu42+(acc1[0]*-77077)+alu41+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*637)+alu42+(acc0[1]*-77077)+alu41+32767)/65536), (((acc3*637)+(acc2*-75166)+(acc0[0]*-77077)+(val8*637)+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*1568)+alu8)))) = alu43; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_30_7_2_3_3_32_4n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:15); | |
__builtin_HEXAGON_Y4_l2fetch(data2+(alu1*384), 0x808000|91); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
int alu4 = (alu0?15:30); | |
for (int ridx0 = alu1; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*3219); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|8); | |
int alu11 = (ridx0*1568); | |
for (int ridx1 = 0; ridx1 < 7; ridx1++) { | |
int alu12 = (ridx1*224); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<6); | |
int alu15 = (alu11+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 2; ridx2++) { | |
_Bool alu17 = (ridx2<1); | |
int alu18 = (ridx2<<7); | |
_Bool alu19 = (alu17!=1); | |
unsigned_char128 val4 = (alu17?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu17?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu17?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu17?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
unsigned_char128 val8 = ((alu17&alu13)?*((unsigned_char128*)((data1+(alu15+-160)))):cast0); | |
unsigned_char128 val9 = ((alu19&alu13)?*((unsigned_char128*)((data1+(alu15+-128)))):cast0); | |
unsigned_char128 val10 = (alu17?*((unsigned_char128*)((data1+(alu15+64)))):cast0); | |
unsigned_char128 val11 = (alu19?*((unsigned_char128*)((data1+(alu15+96)))):cast0); | |
unsigned_char128 val12 = ((alu17&alu14)?*((unsigned_char128*)((data1+(alu15+288)))):cast0); | |
unsigned_char128 val13 = ((alu19&alu14)?*((unsigned_char128*)((data1+(alu15+320)))):cast0); | |
int alu20 = (alu11+alu12+alu18); | |
unsigned_char128 val14 = *((unsigned_char128*)((data1+alu20))); | |
unsigned_char128 val15 = (alu13?*((unsigned_char128*)((data1+(alu20+-224)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu20+224)))):cast0); | |
unsigned_char128 alu21 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val8, val8, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val10, val10, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val15, val15, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val16, val16, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val9, val15, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val11, val14, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val13, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val10, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val15, val8, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val12, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu33 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu22), val5, alu23), val6, alu24)*3219)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu22, 16843009u), alu23, 16843009u), alu24, 16843009u)*-408813)+(val7*3219)+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu31), val1, alu30), val2, alu32)*3219)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu31, 16843009u), alu30, 16843009u), alu32, 16843009u)*-408813)+alu9+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu25), val1, alu21), val2, alu26)*3219)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu25, 16843009u), alu21, 16843009u), alu26, 16843009u)*-408813)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu27), val1, alu28), val2, alu29)*3219)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu29, 16843009u)*-408813)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu18+alu15)))) = alu33; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_13_120_8_32_4_5n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int128 val0 = *((int128*)((data3+0))); | |
int32 val1 = *((int32*)((data3+128))); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|2); | |
int32 alu1 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu2 = __builtin_shufflevector(val0, val0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu3 = __builtin_shufflevector(val0, val0, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu4 = __builtin_shufflevector(val0, val0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data2+0, 0x808000|8); | |
unsigned_char128 cast1 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char8 cast2 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int2 cast3 = (int2){0,0}; | |
_Bool alu6 = ((g0!=0)!=1); | |
int alu7 = (alu6?0:6); | |
int alu8 = (alu6?6:13); | |
for (int ridx0 = alu7; ridx0 < alu8; ridx0++) { | |
int alu9 = (ridx0<<7); | |
_Bool alu10 = (ridx0<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu9); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu9, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu9+96)); | |
int2 acc0 = cast3; | |
int2 acc1 = cast3; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
int32 acc6 = cast0; | |
int32 acc7 = cast0; | |
int32 acc8 = cast0; | |
int32 acc9 = cast0; | |
int32 acc10 = cast0; | |
int32 acc11 = cast0; | |
int32 acc12 = cast0; | |
int32 acc13 = cast0; | |
int32 acc14 = cast0; | |
int32 acc15 = cast0; | |
int32 acc16 = cast0; | |
int32 acc17 = cast0; | |
int32 acc18 = cast0; | |
int32 acc19 = cast0; | |
int32 acc20 = cast0; | |
int32 acc21 = cast0; | |
for (int ridx1 = 0; ridx1 < 120; ridx1++) { | |
int alu16 = (ridx1<<8); | |
int alu17 = (alu9+((ridx1>>2)*1568)+((ridx1&3)<<3)); | |
int alu18 = (alu16+30720); | |
int alu19 = (alu16+61440); | |
int alu20 = (alu16+92160); | |
int alu21 = (alu16+122880); | |
int alu22 = (alu16+128); | |
int alu23 = (alu16+30848); | |
int alu24 = (alu16+61568); | |
int alu25 = (alu16+92288); | |
int alu26 = (alu16+123008); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = *((unsigned_char8*)((data1+alu17))); | |
int alu28 = (alu17+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu28+16)); | |
unsigned_char8 val3 = (alu10?*((unsigned_char8*)((data1+alu28))):cast2); | |
int alu30 = (alu17+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu30+16)); | |
unsigned_char8 val4 = (alu10?*((unsigned_char8*)((data1+alu30))):cast2); | |
int alu32 = (alu17+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu32+16)); | |
unsigned_char8 val5 = (alu10?*((unsigned_char8*)((data1+alu32))):cast2); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+alu16))); | |
unsigned_char128 val7 = *((unsigned_char128*)((data2+alu22))); | |
unsigned_char128 val8 = *((unsigned_char128*)((data2+alu18))); | |
unsigned_char128 val9 = *((unsigned_char128*)((data2+alu23))); | |
unsigned_char128 val10 = *((unsigned_char128*)((data2+alu19))); | |
unsigned_char128 val11 = *((unsigned_char128*)((data2+alu24))); | |
unsigned_char128 val12 = *((unsigned_char128*)((data2+alu20))); | |
unsigned_char128 val13 = *((unsigned_char128*)((data2+alu25))); | |
unsigned_char128 val14 = *((unsigned_char128*)((data2+alu21))); | |
unsigned_char128 val15 = *((unsigned_char128*)((data2+alu26))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char128 val16 = (alu10?*((unsigned_char128*)((data2+alu16))):cast1); | |
unsigned_char128 val17 = (alu10?*((unsigned_char128*)((data2+alu22))):cast1); | |
unsigned_char128 val18 = (alu10?*((unsigned_char128*)((data2+alu18))):cast1); | |
unsigned_char128 val19 = (alu10?*((unsigned_char128*)((data2+alu23))):cast1); | |
unsigned_char128 val20 = (alu10?*((unsigned_char128*)((data2+alu19))):cast1); | |
unsigned_char128 val21 = (alu10?*((unsigned_char128*)((data2+alu24))):cast1); | |
unsigned_char128 val22 = (alu10?*((unsigned_char128*)((data2+alu20))):cast1); | |
unsigned_char128 val23 = (alu10?*((unsigned_char128*)((data2+alu25))):cast1); | |
unsigned_char128 val24 = (alu10?*((unsigned_char128*)((data2+alu21))):cast1); | |
unsigned_char128 val25 = (alu10?*((unsigned_char128*)((data2+alu26))):cast1); | |
unsigned_char4 alu34 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned int cast4 = (*((unsigned int*)&alu34)); | |
unsigned_char4 alu35 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned int cast5 = (*((unsigned int*)&alu35)); | |
unsigned_char4 alu36 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned int cast6 = (*((unsigned int*)&alu36)); | |
unsigned_char4 alu37 = __builtin_shufflevector(val5, val5, 0, 1, 2, 3); | |
unsigned int cast7 = (*((unsigned int*)&alu37)); | |
unsigned_char4 alu38 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned int cast8 = (*((unsigned int*)&alu38)); | |
unsigned_char4 alu39 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned int cast9 = (*((unsigned int*)&alu39)); | |
unsigned_char4 alu40 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
unsigned int cast10 = (*((unsigned int*)&alu40)); | |
unsigned_char4 alu41 = __builtin_shufflevector(val5, val5, 4, 5, 6, 7); | |
unsigned int cast11 = (*((unsigned int*)&alu41)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val6, cast4), val7, cast8); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val16, cast5), val17, cast9); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val16, cast6), val17, cast10); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val16, cast7), val17, cast11); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val8, cast4), val9, cast8); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val18, cast5), val19, cast9); | |
acc8 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc8, val18, cast6), val19, cast10); | |
acc9 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc9, val18, cast7), val19, cast11); | |
acc10 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc10, val10, cast4), val11, cast8); | |
acc11 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc11, val20, cast5), val21, cast9); | |
acc12 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc12, val20, cast6), val21, cast10); | |
acc13 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc13, val20, cast7), val21, cast11); | |
acc14 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc14, val12, cast4), val13, cast8); | |
acc15 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc15, val22, cast5), val23, cast9); | |
acc16 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc16, val22, cast6), val23, cast10); | |
acc17 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc17, val22, cast7), val23, cast11); | |
acc18 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc18, val14, cast4), val15, cast8); | |
acc19 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc19, val24, cast5), val25, cast9); | |
acc20 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc20, val24, cast6), val25, cast10); | |
acc21 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc21, val24, cast7), val25, cast11); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val4)), (*((long long*)&val5))); | |
acc1 = (*((int2*)&precast3)); | |
} | |
int128 val26 = (alu10?*((int128*)((data3+0))):(int128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 val27 = (alu10?*((int32*)((data3+128))):cast0); | |
int32 alu65 = (val27*47); | |
int32 alu66 = __builtin_shufflevector(val26, val26, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu67 = (alu66*47); | |
int32 alu68 = __builtin_shufflevector(val26, val26, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu69 = (alu68*47); | |
int32 alu70 = __builtin_shufflevector(val26, val26, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 alu71 = (alu70*47); | |
int32 alu72 = __builtin_shufflevector(val26, val26, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int32 alu73 = (alu72*47); | |
unsigned_char128 alu74 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*47)+(acc1[1]*-6345)+alu67+32767)/65536), (((acc4*47)+(acc1[0]*-6345)+alu67+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*47)+(acc0[1]*-6345)+alu67+32767)/65536), (((acc2*47)+(acc0[0]*-6345)+(alu1*47)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu9))) = alu74; | |
unsigned_char128 alu76 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc9*47)+(acc1[1]*-6345)+alu69+32767)/65536), (((acc8*47)+(acc1[0]*-6345)+alu69+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*47)+(acc0[1]*-6345)+alu69+32767)/65536), (((acc6*47)+(acc0[0]*-6345)+(alu2*47)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+1568)))) = alu76; | |
unsigned_char128 alu78 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc13*47)+(acc1[1]*-6345)+alu71+32767)/65536), (((acc12*47)+(acc1[0]*-6345)+alu71+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc11*47)+(acc0[1]*-6345)+alu71+32767)/65536), (((acc10*47)+(acc0[0]*-6345)+(alu3*47)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+3136)))) = alu78; | |
unsigned_char128 alu80 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc17*47)+(acc1[1]*-6345)+alu73+32767)/65536), (((acc16*47)+(acc1[0]*-6345)+alu73+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc15*47)+(acc0[1]*-6345)+alu73+32767)/65536), (((acc14*47)+(acc0[0]*-6345)+(alu4*47)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+4704)))) = alu80; | |
unsigned_char128 alu82 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc21*47)+(acc1[1]*-6345)+alu65+32767)/65536), (((acc20*47)+(acc1[0]*-6345)+alu65+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc19*47)+(acc0[1]*-6345)+alu65+32767)/65536), (((acc18*47)+(acc0[0]*-6345)+(val1*47)+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu9+6272)))) = alu82; | |
} | |
} | |
__attribute__((noinline)) void E_62_128n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|62); | |
__builtin_HEXAGON_Y4_l2fetch(data2, 0x808000|62); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:31); | |
int alu4 = (alu2?31:62); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu5))); | |
int alu6 = (alu5+32); | |
_Bool alu7 = (ridx0<61); | |
unsigned_char128 val2 = (alu7?*((unsigned_char128*)((data1+alu6))):cast0); | |
unsigned_char128 val3 = (alu7?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char32 alu8 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu8, int32); | |
unsigned_char32 alu9 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast2 = __builtin_convertvector(alu9, int32); | |
unsigned_char32 alu10 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
unsigned_char32 alu11 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast4 = __builtin_convertvector(alu11, int32); | |
unsigned_char32 alu12 = __builtin_shufflevector(val2, val2, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast5 = __builtin_convertvector(alu12, int32); | |
unsigned_char32 alu13 = __builtin_shufflevector(val3, val3, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 cast6 = __builtin_convertvector(alu13, int32); | |
unsigned_char32 alu14 = __builtin_shufflevector(val2, val2, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast7 = __builtin_convertvector(alu14, int32); | |
unsigned_char32 alu15 = __builtin_shufflevector(val3, val3, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
int32 cast8 = __builtin_convertvector(alu15, int32); | |
unsigned_char128 alu16 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B(((((((cast8*9685)+(cast7*8041)+-2217573)/65536)*271422)+32767)/65536), ((((((cast6*9685)+(cast5*8041)+-2217573)/65536)*271422)+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B(((((((cast4*9685)+(cast2*8041)+-2217573)/65536)*271422)+32767)/65536), ((((((cast3*9685)+(cast1*8041)+-2217573)/65536)*271422)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu5))) = alu16; | |
} | |
} | |
__attribute__((noinline)) void r_30_13_20_8_32_4n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|62); | |
unsigned_char8 cast1 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int32 cast2 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char128 cast3 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
_Bool alu2 = ((g0!=0)!=1); | |
int alu3 = (alu2?0:15); | |
int alu4 = (alu2?15:30); | |
for (int ridx0 = alu3; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*5120); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|81); | |
int alu7 = (ridx0<<5); | |
for (int ridx1 = 0; ridx1 < 13; ridx1++) { | |
int alu8 = (ridx1<<7); | |
_Bool alu9 = (ridx1<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu8+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast2; | |
int32 acc3 = cast2; | |
int32 acc4 = cast2; | |
int32 acc5 = cast2; | |
int32 acc6 = cast2; | |
int32 acc7 = cast2; | |
for (int ridx2 = 0; ridx2 < 20; ridx2++) { | |
int alu14 = (alu5+(ridx2<<8)); | |
int alu15 = (alu8+((ridx2>>2)*1568)+((ridx2&3)<<3)); | |
int alu16 = (alu14+128); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu15))); | |
int alu18 = (alu15+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val1 = (alu9?*((unsigned_char8*)((data1+alu18))):cast1); | |
int alu20 = (alu15+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val2 = (alu9?*((unsigned_char8*)((data1+alu20))):cast1); | |
int alu22 = (alu15+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val3 = (alu9?*((unsigned_char8*)((data1+alu22))):cast1); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu14))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu16))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu24 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu28 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
unsigned_char128 val6 = (alu9?*((unsigned_char128*)((data2+alu14))):cast3); | |
unsigned_char128 val7 = (alu9?*((unsigned_char128*)((data2+alu16))):cast3); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val6, (*((unsigned int*)&alu25))), val7, (*((unsigned int*)&alu29))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val6, 16843009u), val7, 16843009u); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val6, (*((unsigned int*)&alu26))), val7, (*((unsigned int*)&alu30))); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val6, (*((unsigned int*)&alu27))), val7, (*((unsigned int*)&alu31))); | |
} | |
int32 val8 = *((int32*)((data3+alu7))); | |
int32 val9 = (alu9?*((int32*)((data3+alu7))):cast2); | |
int32 alu41 = (val9*1063); | |
int32 alu42 = (acc5*-117993); | |
unsigned_char128 alu43 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*1063)+alu42+(acc1[1]*-133938)+alu41+32767)/65536), (((acc6*1063)+alu42+(acc1[0]*-133938)+alu41+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*1063)+alu42+(acc0[1]*-133938)+alu41+32767)/65536), (((acc3*1063)+(acc2*-117993)+(acc0[0]*-133938)+(val8*1063)+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*1568)+alu8)))) = alu43; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_30_7_2_3_3_32_4n2(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:15); | |
__builtin_HEXAGON_Y4_l2fetch(data2+(alu1*384), 0x808000|91); | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|8); | |
unsigned_char128 cast0 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
int alu4 = (alu0?15:30); | |
for (int ridx0 = alu1; ridx0 < alu4; ridx0++) { | |
int alu5 = (ridx0*384); | |
int alu6 = (alu5+128); | |
int alu7 = (alu5+256); | |
int alu8 = (ridx0<<5); | |
unsigned_char128 val0 = *((unsigned_char128*)((data2+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data2+alu6))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu7))); | |
int32 val3 = *((int32*)((data3+alu8))); | |
int32 alu9 = (val3*3278); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|8); | |
int alu11 = (ridx0*1568); | |
for (int ridx1 = 0; ridx1 < 7; ridx1++) { | |
int alu12 = (ridx1*224); | |
_Bool alu13 = ((ridx1<1)!=1); | |
_Bool alu14 = (ridx1<6); | |
int alu15 = (alu11+alu12); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu15, 0x808000|8); | |
for (int ridx2 = 0; ridx2 < 2; ridx2++) { | |
_Bool alu17 = (ridx2<1); | |
int alu18 = (ridx2<<7); | |
_Bool alu19 = (alu17!=1); | |
unsigned_char128 val4 = (alu17?*((unsigned_char128*)((data2+alu5))):cast0); | |
unsigned_char128 val5 = (alu17?*((unsigned_char128*)((data2+alu6))):cast0); | |
unsigned_char128 val6 = (alu17?*((unsigned_char128*)((data2+alu7))):cast0); | |
int32 val7 = (alu17?*((int32*)((data3+alu8))):(int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
unsigned_char128 val8 = ((alu17&alu13)?*((unsigned_char128*)((data1+(alu15+-160)))):cast0); | |
unsigned_char128 val9 = ((alu19&alu13)?*((unsigned_char128*)((data1+(alu15+-128)))):cast0); | |
unsigned_char128 val10 = (alu17?*((unsigned_char128*)((data1+(alu15+64)))):cast0); | |
unsigned_char128 val11 = (alu19?*((unsigned_char128*)((data1+(alu15+96)))):cast0); | |
unsigned_char128 val12 = ((alu17&alu14)?*((unsigned_char128*)((data1+(alu15+288)))):cast0); | |
unsigned_char128 val13 = ((alu19&alu14)?*((unsigned_char128*)((data1+(alu15+320)))):cast0); | |
int alu20 = (alu11+alu12+alu18); | |
unsigned_char128 val14 = *((unsigned_char128*)((data1+alu20))); | |
unsigned_char128 val15 = (alu13?*((unsigned_char128*)((data1+(alu20+-224)))):cast0); | |
unsigned_char128 val16 = (alu14?*((unsigned_char128*)((data1+(alu20+224)))):cast0); | |
unsigned_char128 alu21 = __builtin_shufflevector(val14, val14, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu22 = __builtin_shufflevector(val8, val8, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu23 = __builtin_shufflevector(val10, val10, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu24 = __builtin_shufflevector(val12, val12, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu25 = __builtin_shufflevector(val15, val15, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu26 = __builtin_shufflevector(val16, val16, 0, 32, 64, -1, 1, 33, 65, -1, 2, 34, 66, -1, 3, 35, 67, -1, 4, 36, 68, -1, 5, 37, 69, -1, 6, 38, 70, -1, 7, 39, 71, -1, 8, 40, 72, -1, 9, 41, 73, -1, 10, 42, 74, -1, 11, 43, 75, -1, 12, 44, 76, -1, 13, 45, 77, -1, 14, 46, 78, -1, 15, 47, 79, -1, 16, 48, 80, -1, 17, 49, 81, -1, 18, 50, 82, -1, 19, 51, 83, -1, 20, 52, 84, -1, 21, 53, 85, -1, 22, 54, 86, -1, 23, 55, 87, -1, 24, 56, 88, -1, 25, 57, 89, -1, 26, 58, 90, -1, 27, 59, 91, -1, 28, 60, 92, -1, 29, 61, 93, -1, 30, 62, 94, -1, 31, 63, 95, -1); | |
unsigned_char128 alu27 = __builtin_shufflevector(val9, val15, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu28 = __builtin_shufflevector(val11, val14, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu29 = __builtin_shufflevector(val13, val16, 0, 128, 160, -1, 1, 129, 161, -1, 2, 130, 162, -1, 3, 131, 163, -1, 4, 132, 164, -1, 5, 133, 165, -1, 6, 134, 166, -1, 7, 135, 167, -1, 8, 136, 168, -1, 9, 137, 169, -1, 10, 138, 170, -1, 11, 139, 171, -1, 12, 140, 172, -1, 13, 141, 173, -1, 14, 142, 174, -1, 15, 143, 175, -1, 16, 144, 176, -1, 17, 145, 177, -1, 18, 146, 178, -1, 19, 147, 179, -1, 20, 148, 180, -1, 21, 149, 181, -1, 22, 150, 182, -1, 23, 151, 183, -1, 24, 152, 184, -1, 25, 153, 185, -1, 26, 154, 186, -1, 27, 155, 187, -1, 28, 156, 188, -1, 29, 157, 189, -1, 30, 158, 190, -1, 31, 159, 191, -1); | |
unsigned_char128 alu30 = __builtin_shufflevector(val14, val10, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu31 = __builtin_shufflevector(val15, val8, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu32 = __builtin_shufflevector(val16, val12, 32, 64, 160, -1, 33, 65, 161, -1, 34, 66, 162, -1, 35, 67, 163, -1, 36, 68, 164, -1, 37, 69, 165, -1, 38, 70, 166, -1, 39, 71, 167, -1, 40, 72, 168, -1, 41, 73, 169, -1, 42, 74, 170, -1, 43, 75, 171, -1, 44, 76, 172, -1, 45, 77, 173, -1, 46, 78, 174, -1, 47, 79, 175, -1, 48, 80, 176, -1, 49, 81, 177, -1, 50, 82, 178, -1, 51, 83, 179, -1, 52, 84, 180, -1, 53, 85, 181, -1, 54, 86, 182, -1, 55, 87, 183, -1, 56, 88, 184, -1, 57, 89, 185, -1, 58, 90, 186, -1, 59, 91, 187, -1, 60, 92, 188, -1, 61, 93, 189, -1, 62, 94, 190, -1, 63, 95, 191, -1); | |
unsigned_char128 alu33 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val4, alu22), val5, alu23), val6, alu24)*3278)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu22, 16843009u), alu23, 16843009u), alu24, 16843009u)*-334356)+(val7*3278)+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu31), val1, alu30), val2, alu32)*3278)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu31, 16843009u), alu30, 16843009u), alu32, 16843009u)*-334356)+alu9+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu25), val1, alu21), val2, alu26)*3278)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu25, 16843009u), alu21, 16843009u), alu26, 16843009u)*-334356)+alu9+32767)/65536), (((__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_acc_128B(__builtin_HEXAGON_V6_vrmpybusv_128B(val0, alu27), val1, alu28), val2, alu29)*3278)+(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_128B(alu27, 16843009u), alu28, 16843009u), alu29, 16843009u)*-334356)+alu9+32767)/65536))); | |
*((unsigned_char128*)((data0+(alu18+alu15)))) = alu33; | |
} | |
} | |
} | |
} | |
__attribute__((noinline)) void r_10_13_120_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|3); | |
int2 cast0 = (int2){0,0}; | |
unsigned_char8 cast1 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int32 cast2 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char128 cast3 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:5); | |
int alu3 = (alu1?5:10); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<5); | |
int alu5 = (ridx0*30720); | |
int32 val0 = *((int32*)((data3+alu4))); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|8); | |
for (int ridx1 = 0; ridx1 < 13; ridx1++) { | |
int alu7 = (ridx1<<7); | |
_Bool alu8 = (ridx1<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu7, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast2; | |
int32 acc3 = cast2; | |
int32 acc4 = cast2; | |
int32 acc5 = cast2; | |
for (int ridx2 = 0; ridx2 < 120; ridx2++) { | |
int alu14 = (alu7+((ridx2>>2)*1568)+((ridx2&3)<<3)); | |
int alu15 = (alu5+(ridx2<<8)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu14+16)); | |
unsigned_char8 val1 = *((unsigned_char8*)((data1+alu14))); | |
int alu17 = (alu14+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu17+16)); | |
unsigned_char8 val2 = (alu8?*((unsigned_char8*)((data1+alu17))):cast1); | |
int alu19 = (alu14+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu19+16)); | |
unsigned_char8 val3 = (alu8?*((unsigned_char8*)((data1+alu19))):cast1); | |
int alu21 = (alu14+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu21+16)); | |
unsigned_char8 val4 = (alu8?*((unsigned_char8*)((data1+alu21))):cast1); | |
int alu23 = (alu15+128); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu15))); | |
unsigned_char128 val6 = *((unsigned_char128*)((data2+alu23))); | |
unsigned_char4 alu24 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu26 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu27 = __builtin_shufflevector(val4, val4, 0, 1, 2, 3); | |
unsigned_char4 alu28 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu29 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu30 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
unsigned_char4 alu31 = __builtin_shufflevector(val4, val4, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val1)), (*((long long*)&val2))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val3)), (*((long long*)&val4))); | |
acc1 = (*((int2*)&precast3)); | |
unsigned_char128 val7 = (alu8?*((unsigned_char128*)((data2+alu15))):cast3); | |
unsigned_char128 val8 = (alu8?*((unsigned_char128*)((data2+alu23))):cast3); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val5, (*((unsigned int*)&alu24))), val6, (*((unsigned int*)&alu28))); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val7, (*((unsigned int*)&alu25))), val8, (*((unsigned int*)&alu29))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val7, (*((unsigned int*)&alu26))), val8, (*((unsigned int*)&alu30))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val7, (*((unsigned int*)&alu27))), val8, (*((unsigned int*)&alu31))); | |
} | |
int32 val9 = (alu8?*((int32*)((data3+alu4))):cast2); | |
int32 alu39 = (val9*318); | |
unsigned_char128 alu40 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc5*318)+(acc1[1]*-42294)+alu39+32767)/65536), (((acc4*318)+(acc1[0]*-42294)+alu39+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*318)+(acc0[1]*-42294)+alu39+32767)/65536), (((acc2*318)+(acc0[0]*-42294)+(val0*318)+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*1568)+alu7)))) = alu40; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_40_13_40_8_32_4(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int2 cast0 = (int2){0,0}; | |
unsigned_char8 cast1 = (unsigned_char8){0,0,0,0,0,0,0,0}; | |
int32 cast2 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
unsigned_char128 cast3 = (unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y4_l2fetch(data3, 0x808000|10); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:20); | |
int alu3 = (alu1?20:40); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0*10240); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu4, 0x808000|8); | |
int alu6 = (ridx0<<5); | |
for (int ridx1 = 0; ridx1 < 13; ridx1++) { | |
int alu7 = (ridx1<<7); | |
_Bool alu8 = (ridx1<12); | |
__builtin_HEXAGON_Y2_dcfetch(data1+alu7); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu7, 0x808000|8); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+32)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+64)); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+96)); | |
int2 acc0 = cast0; | |
int2 acc1 = cast0; | |
int32 acc2 = cast2; | |
int32 acc3 = cast2; | |
int32 acc4 = cast2; | |
int32 acc5 = cast2; | |
int32 acc6 = cast2; | |
int32 acc7 = cast2; | |
for (int ridx2 = 0; ridx2 < 40; ridx2++) { | |
int alu14 = (alu4+(ridx2<<8)); | |
int alu15 = (alu7+((ridx2>>2)*1568)+((ridx2&3)<<3)); | |
int alu16 = (alu14+128); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu15+16)); | |
unsigned_char8 val0 = *((unsigned_char8*)((data1+alu15))); | |
int alu18 = (alu15+32); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu18+16)); | |
unsigned_char8 val1 = (alu8?*((unsigned_char8*)((data1+alu18))):cast1); | |
int alu20 = (alu15+64); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu20+16)); | |
unsigned_char8 val2 = (alu8?*((unsigned_char8*)((data1+alu20))):cast1); | |
int alu22 = (alu15+96); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu22+16)); | |
unsigned_char8 val3 = (alu8?*((unsigned_char8*)((data1+alu22))):cast1); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+alu14))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+alu16))); | |
int2 precast0 = acc0; | |
int2 precast1 = acc1; | |
unsigned_char4 alu24 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3); | |
unsigned_char4 alu25 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3); | |
unsigned_char4 alu26 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3); | |
unsigned_char4 alu27 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3); | |
unsigned_char4 alu28 = __builtin_shufflevector(val0, val0, 4, 5, 6, 7); | |
unsigned_char4 alu29 = __builtin_shufflevector(val1, val1, 4, 5, 6, 7); | |
unsigned_char4 alu30 = __builtin_shufflevector(val2, val2, 4, 5, 6, 7); | |
unsigned_char4 alu31 = __builtin_shufflevector(val3, val3, 4, 5, 6, 7); | |
long long precast2 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast0)), (*((long long*)&val0)), (*((long long*)&val1))); | |
acc0 = (*((int2*)&precast2)); | |
long long precast3 = __builtin_HEXAGON_A2_vraddub_acc((*((long long*)&precast1)), (*((long long*)&val2)), (*((long long*)&val3))); | |
acc1 = (*((int2*)&precast3)); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, 16843009u), val5, 16843009u); | |
unsigned_char128 val6 = (alu8?*((unsigned_char128*)((data2+alu14))):cast3); | |
unsigned_char128 val7 = (alu8?*((unsigned_char128*)((data2+alu16))):cast3); | |
acc3 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc3, val4, (*((unsigned int*)&alu24))), val5, (*((unsigned int*)&alu28))); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, val6, (*((unsigned int*)&alu25))), val7, (*((unsigned int*)&alu29))); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, val6, 16843009u), val7, 16843009u); | |
acc6 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc6, val6, (*((unsigned int*)&alu26))), val7, (*((unsigned int*)&alu30))); | |
acc7 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(__builtin_HEXAGON_V6_vrmpybus_acc_128B(acc7, val6, (*((unsigned int*)&alu27))), val7, (*((unsigned int*)&alu31))); | |
} | |
int32 val8 = *((int32*)((data3+alu6))); | |
int32 val9 = (alu8?*((int32*)((data3+alu6))):cast2); | |
int32 alu41 = (val9*670); | |
int32 alu42 = (acc5*-80400); | |
unsigned_char128 alu43 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc7*670)+alu42+(acc1[1]*-87100)+alu41+32767)/65536), (((acc6*670)+alu42+(acc1[0]*-87100)+alu41+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc4*670)+alu42+(acc0[1]*-87100)+alu41+32767)/65536), (((acc3*670)+(acc2*-80400)+(acc0[0]*-87100)+(val8*670)+32767)/65536))); | |
*((unsigned_char128*)((data0+((ridx0*1568)+alu7)))) = alu43; | |
} | |
} | |
} | |
__attribute__((noinline)) void r_10_49_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:5); | |
int alu2 = (alu0?5:10); | |
for (int ridx0 = alu1; ridx0 < alu2; ridx0++) { | |
int alu3 = (ridx0*6272); | |
__builtin_HEXAGON_Y4_l2fetch(data1+alu3, 0x808000|27); | |
int32 acc0 = cast0; | |
int32 acc1 = cast0; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
for (int ridx1 = 0; ridx1 < 49; ridx1++) { | |
int alu5 = (alu3+(ridx1<<5)); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu5))); | |
unsigned_char128 val1 = *((unsigned_char128*)((data1+(alu5+1568)))); | |
unsigned_char128 val2 = *((unsigned_char128*)((data1+(alu5+3136)))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data1+(alu5+4704)))); | |
unsigned_char32 alu6 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast1 = __builtin_convertvector(alu6, int32); | |
acc0 = (acc0+cast1); | |
unsigned_char32 alu8 = __builtin_shufflevector(val1, val1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast2 = __builtin_convertvector(alu8, int32); | |
acc1 = (acc1+cast2); | |
unsigned_char32 alu10 = __builtin_shufflevector(val2, val2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast3 = __builtin_convertvector(alu10, int32); | |
acc2 = (acc2+cast3); | |
unsigned_char32 alu12 = __builtin_shufflevector(val3, val3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 cast4 = __builtin_convertvector(alu12, int32); | |
acc3 = (acc3+cast4); | |
} | |
unsigned_char128 alu15 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*197)+4095)/8192), (((acc2*197)+4095)/8192)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc1*197)+4095)/8192), (((acc0*197)+4095)/8192))); | |
*((unsigned_char128*)((data0+(ridx0<<7)))) = alu15; | |
} | |
} | |
__attribute__((noinline)) void E_10_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|10); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:5); | |
int alu3 = (alu1?5:10); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu4))); | |
unsigned_char128 alu5 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
*((unsigned_char128*)((data0+alu4))) = alu5; | |
} | |
} | |
__attribute__((noinline)) void E_10_128n1(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int128 cast0 = (int128){-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; | |
__builtin_HEXAGON_Y4_l2fetch(data1, 0x808000|10); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:5); | |
int alu3 = (alu1?5:10); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
int alu4 = (ridx0<<7); | |
unsigned_char128 val0 = *((unsigned_char128*)((data1+alu4))); | |
unsigned_char128 alu5 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127); | |
int128 cast1 = __builtin_convertvector(alu5, int128); | |
int128 alu6 = (cast1^cast0); | |
int alu7 = ((alu6[0]<-256)?-256:alu6[0]); | |
int alu8 = ((alu6[1]<-256)?-256:alu6[1]); | |
int alu9 = ((alu6[2]<-256)?-256:alu6[2]); | |
int alu10 = ((alu6[3]<-256)?-256:alu6[3]); | |
int alu11 = ((alu6[4]<-256)?-256:alu6[4]); | |
int alu12 = ((alu6[5]<-256)?-256:alu6[5]); | |
int alu13 = ((alu6[6]<-256)?-256:alu6[6]); | |
int alu14 = ((alu6[7]<-256)?-256:alu6[7]); | |
int alu15 = ((alu6[8]<-256)?-256:alu6[8]); | |
int alu16 = ((alu6[9]<-256)?-256:alu6[9]); | |
int alu17 = ((alu6[10]<-256)?-256:alu6[10]); | |
int alu18 = ((alu6[11]<-256)?-256:alu6[11]); | |
int alu19 = ((alu6[12]<-256)?-256:alu6[12]); | |
int alu20 = ((alu6[13]<-256)?-256:alu6[13]); | |
int alu21 = ((alu6[14]<-256)?-256:alu6[14]); | |
int alu22 = ((alu6[15]<-256)?-256:alu6[15]); | |
int alu23 = ((alu6[16]<-256)?-256:alu6[16]); | |
int alu24 = ((alu6[17]<-256)?-256:alu6[17]); | |
int alu25 = ((alu6[18]<-256)?-256:alu6[18]); | |
int alu26 = ((alu6[19]<-256)?-256:alu6[19]); | |
int alu27 = ((alu6[20]<-256)?-256:alu6[20]); | |
int alu28 = ((alu6[21]<-256)?-256:alu6[21]); | |
int alu29 = ((alu6[22]<-256)?-256:alu6[22]); | |
int alu30 = ((alu6[23]<-256)?-256:alu6[23]); | |
int alu31 = ((alu6[24]<-256)?-256:alu6[24]); | |
int alu32 = ((alu6[25]<-256)?-256:alu6[25]); | |
int alu33 = ((alu6[26]<-256)?-256:alu6[26]); | |
int alu34 = ((alu6[27]<-256)?-256:alu6[27]); | |
int alu35 = ((alu6[28]<-256)?-256:alu6[28]); | |
int alu36 = ((alu6[29]<-256)?-256:alu6[29]); | |
int alu37 = ((alu6[30]<-256)?-256:alu6[30]); | |
int alu38 = ((alu6[31]<-256)?-256:alu6[31]); | |
int alu39 = ((alu6[32]<-256)?-256:alu6[32]); | |
int alu40 = ((alu6[33]<-256)?-256:alu6[33]); | |
int alu41 = ((alu6[34]<-256)?-256:alu6[34]); | |
int alu42 = ((alu6[35]<-256)?-256:alu6[35]); | |
int alu43 = ((alu6[36]<-256)?-256:alu6[36]); | |
int alu44 = ((alu6[37]<-256)?-256:alu6[37]); | |
int alu45 = ((alu6[38]<-256)?-256:alu6[38]); | |
int alu46 = ((alu6[39]<-256)?-256:alu6[39]); | |
int alu47 = ((alu6[40]<-256)?-256:alu6[40]); | |
int alu48 = ((alu6[41]<-256)?-256:alu6[41]); | |
int alu49 = ((alu6[42]<-256)?-256:alu6[42]); | |
int alu50 = ((alu6[43]<-256)?-256:alu6[43]); | |
int alu51 = ((alu6[44]<-256)?-256:alu6[44]); | |
int alu52 = ((alu6[45]<-256)?-256:alu6[45]); | |
int alu53 = ((alu6[46]<-256)?-256:alu6[46]); | |
int alu54 = ((alu6[47]<-256)?-256:alu6[47]); | |
int alu55 = ((alu6[48]<-256)?-256:alu6[48]); | |
int alu56 = ((alu6[49]<-256)?-256:alu6[49]); | |
int alu57 = ((alu6[50]<-256)?-256:alu6[50]); | |
int alu58 = ((alu6[51]<-256)?-256:alu6[51]); | |
int alu59 = ((alu6[52]<-256)?-256:alu6[52]); | |
int alu60 = ((alu6[53]<-256)?-256:alu6[53]); | |
int alu61 = ((alu6[54]<-256)?-256:alu6[54]); | |
int alu62 = ((alu6[55]<-256)?-256:alu6[55]); | |
int alu63 = ((alu6[56]<-256)?-256:alu6[56]); | |
int alu64 = ((alu6[57]<-256)?-256:alu6[57]); | |
int alu65 = ((alu6[58]<-256)?-256:alu6[58]); | |
int alu66 = ((alu6[59]<-256)?-256:alu6[59]); | |
int alu67 = ((alu6[60]<-256)?-256:alu6[60]); | |
int alu68 = ((alu6[61]<-256)?-256:alu6[61]); | |
int alu69 = ((alu6[62]<-256)?-256:alu6[62]); | |
int alu70 = ((alu6[63]<-256)?-256:alu6[63]); | |
int alu71 = ((alu6[64]<-256)?-256:alu6[64]); | |
int alu72 = ((alu6[65]<-256)?-256:alu6[65]); | |
int alu73 = ((alu6[66]<-256)?-256:alu6[66]); | |
int alu74 = ((alu6[67]<-256)?-256:alu6[67]); | |
int alu75 = ((alu6[68]<-256)?-256:alu6[68]); | |
int alu76 = ((alu6[69]<-256)?-256:alu6[69]); | |
int alu77 = ((alu6[70]<-256)?-256:alu6[70]); | |
int alu78 = ((alu6[71]<-256)?-256:alu6[71]); | |
int alu79 = ((alu6[72]<-256)?-256:alu6[72]); | |
int alu80 = ((alu6[73]<-256)?-256:alu6[73]); | |
int alu81 = ((alu6[74]<-256)?-256:alu6[74]); | |
int alu82 = ((alu6[75]<-256)?-256:alu6[75]); | |
int alu83 = ((alu6[76]<-256)?-256:alu6[76]); | |
int alu84 = ((alu6[77]<-256)?-256:alu6[77]); | |
int alu85 = ((alu6[78]<-256)?-256:alu6[78]); | |
int alu86 = ((alu6[79]<-256)?-256:alu6[79]); | |
int alu87 = ((alu6[80]<-256)?-256:alu6[80]); | |
int alu88 = ((alu6[81]<-256)?-256:alu6[81]); | |
int alu89 = ((alu6[82]<-256)?-256:alu6[82]); | |
int alu90 = ((alu6[83]<-256)?-256:alu6[83]); | |
int alu91 = ((alu6[84]<-256)?-256:alu6[84]); | |
int alu92 = ((alu6[85]<-256)?-256:alu6[85]); | |
int alu93 = ((alu6[86]<-256)?-256:alu6[86]); | |
int alu94 = ((alu6[87]<-256)?-256:alu6[87]); | |
int alu95 = ((alu6[88]<-256)?-256:alu6[88]); | |
int alu96 = ((alu6[89]<-256)?-256:alu6[89]); | |
int alu97 = ((alu6[90]<-256)?-256:alu6[90]); | |
int alu98 = ((alu6[91]<-256)?-256:alu6[91]); | |
int alu99 = ((alu6[92]<-256)?-256:alu6[92]); | |
int alu100 = ((alu6[93]<-256)?-256:alu6[93]); | |
int alu101 = ((alu6[94]<-256)?-256:alu6[94]); | |
int alu102 = ((alu6[95]<-256)?-256:alu6[95]); | |
int alu103 = ((alu6[96]<-256)?-256:alu6[96]); | |
int alu104 = ((alu6[97]<-256)?-256:alu6[97]); | |
int alu105 = ((alu6[98]<-256)?-256:alu6[98]); | |
int alu106 = ((alu6[99]<-256)?-256:alu6[99]); | |
int alu107 = ((alu6[100]<-256)?-256:alu6[100]); | |
int alu108 = ((alu6[101]<-256)?-256:alu6[101]); | |
int alu109 = ((alu6[102]<-256)?-256:alu6[102]); | |
int alu110 = ((alu6[103]<-256)?-256:alu6[103]); | |
int alu111 = ((alu6[104]<-256)?-256:alu6[104]); | |
int alu112 = ((alu6[105]<-256)?-256:alu6[105]); | |
int alu113 = ((alu6[106]<-256)?-256:alu6[106]); | |
int alu114 = ((alu6[107]<-256)?-256:alu6[107]); | |
int alu115 = ((alu6[108]<-256)?-256:alu6[108]); | |
int alu116 = ((alu6[109]<-256)?-256:alu6[109]); | |
int alu117 = ((alu6[110]<-256)?-256:alu6[110]); | |
int alu118 = ((alu6[111]<-256)?-256:alu6[111]); | |
int alu119 = ((alu6[112]<-256)?-256:alu6[112]); | |
int alu120 = ((alu6[113]<-256)?-256:alu6[113]); | |
int alu121 = ((alu6[114]<-256)?-256:alu6[114]); | |
int alu122 = ((alu6[115]<-256)?-256:alu6[115]); | |
int alu123 = ((alu6[116]<-256)?-256:alu6[116]); | |
int alu124 = ((alu6[117]<-256)?-256:alu6[117]); | |
int alu125 = ((alu6[118]<-256)?-256:alu6[118]); | |
int alu126 = ((alu6[119]<-256)?-256:alu6[119]); | |
int alu127 = ((alu6[120]<-256)?-256:alu6[120]); | |
int alu128 = ((alu6[121]<-256)?-256:alu6[121]); | |
int alu129 = ((alu6[122]<-256)?-256:alu6[122]); | |
int alu130 = ((alu6[123]<-256)?-256:alu6[123]); | |
int alu131 = ((alu6[124]<-256)?-256:alu6[124]); | |
int alu132 = ((alu6[125]<-256)?-256:alu6[125]); | |
int alu133 = ((alu6[126]<-256)?-256:alu6[126]); | |
int alu134 = ((alu6[127]<-256)?-256:alu6[127]); | |
unsigned_char128 cast2 = __builtin_convertvector(((int128){alu7,alu8,alu9,alu10,alu11,alu12,alu13,alu14,alu15,alu16,alu17,alu18,alu19,alu20,alu21,alu22,alu23,alu24,alu25,alu26,alu27,alu28,alu29,alu30,alu31,alu32,alu33,alu34,alu35,alu36,alu37,alu38,alu39,alu40,alu41,alu42,alu43,alu44,alu45,alu46,alu47,alu48,alu49,alu50,alu51,alu52,alu53,alu54,alu55,alu56,alu57,alu58,alu59,alu60,alu61,alu62,alu63,alu64,alu65,alu66,alu67,alu68,alu69,alu70,alu71,alu72,alu73,alu74,alu75,alu76,alu77,alu78,alu79,alu80,alu81,alu82,alu83,alu84,alu85,alu86,alu87,alu88,alu89,alu90,alu91,alu92,alu93,alu94,alu95,alu96,alu97,alu98,alu99,alu100,alu101,alu102,alu103,alu104,alu105,alu106,alu107,alu108,alu109,alu110,alu111,alu112,alu113,alu114,alu115,alu116,alu117,alu118,alu119,alu120,alu121,alu122,alu123,alu124,alu125,alu126,alu127,alu128,alu129,alu130,alu131,alu132,alu133,alu134}^cast0), unsigned_char128); | |
*((unsigned_char128*)((data0+alu4))) = cast2; | |
} | |
} | |
__attribute__((noinline)) void r_8_320_4_128(unsigned char* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, unsigned char* restrict __attribute__((align_value(128))) data2, int* restrict __attribute__((align_value(128))) data3, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
int32 cast0 = (int32){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; | |
__builtin_HEXAGON_Y2_dcfetch(data1+0); | |
_Bool alu1 = ((g0!=0)!=1); | |
int alu2 = (alu1?0:4); | |
int alu3 = (alu1?4:8); | |
for (int ridx0 = alu2; ridx0 < alu3; ridx0++) { | |
_Bool alu4 = (ridx0<7); | |
int alu5 = (ridx0<<9); | |
__builtin_HEXAGON_Y4_l2fetch(data2+alu5, 0x808000|8); | |
int32 acc0 = cast0; | |
int32 acc1 = cast0; | |
int32 acc2 = cast0; | |
int32 acc3 = cast0; | |
int32 acc4 = cast0; | |
int32 acc5 = cast0; | |
for (int ridx1 = 0; ridx1 < 320; ridx1++) { | |
int alu7 = (ridx1<<2); | |
__builtin_HEXAGON_Y2_dcfetch(data1+(alu7+8)); | |
unsigned_char4 val0 = *((unsigned_char4*)((data1+alu7))); | |
unsigned int cast1 = (*((unsigned int*)&val0)); | |
unsigned_char128 alu9 = __builtin_shufflevector(val0, val0, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3); | |
acc5 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc5, alu9, 16843009u); | |
unsigned_char4 val1 = (alu4?*((unsigned_char4*)((data1+alu7))):(unsigned_char4){0,0,0,0}); | |
int alu11 = (alu5+(ridx1*4000)); | |
unsigned_char128 val2 = *((unsigned_char128*)((data2+alu11))); | |
unsigned_char128 val3 = *((unsigned_char128*)((data2+(alu11+128)))); | |
unsigned_char128 val4 = *((unsigned_char128*)((data2+(alu11+256)))); | |
unsigned_char128 val5 = *((unsigned_char128*)((data2+(alu11+384)))); | |
unsigned_char128 val6 = (alu4?*((unsigned_char128*)((data2+(alu11+416)))):(unsigned_char128){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
unsigned_char128 cast2 = (unsigned_char128){val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val0[0],val0[1],val0[2],val0[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3],val1[0],val1[1],val1[2],val1[3]}; | |
unsigned_char128 alu12 = __builtin_shufflevector(val5, val6, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223); | |
acc0 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc0, val2, cast1); | |
acc1 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc1, val3, cast1); | |
acc2 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc2, val4, cast1); | |
acc4 = __builtin_HEXAGON_V6_vrmpybus_acc_128B(acc4, cast2, 16843009u); | |
acc3 = __builtin_HEXAGON_V6_vrmpybusv_acc_128B(acc3, alu12, cast2); | |
} | |
int32 alu19 = (acc5*-1635); | |
int alu20 = (ridx0<<7); | |
int104 val7 = *((int104*)((data3+alu20))); | |
int24 val8 = (alu4?*((int24*)((data3+(alu20+104)))):(int24){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int32 alu21 = __builtin_shufflevector(val7, val7, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
int32 alu22 = __builtin_shufflevector(val7, val7, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
int32 alu23 = __builtin_shufflevector(val7, val7, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95); | |
unsigned_char128 alu24 = __builtin_HEXAGON_V6_vpackhub_sat_128B(__builtin_HEXAGON_V6_vpackwh_sat_128B((((acc3*15)+(acc4*-1635)+((int32){val7[96],val7[97],val7[98],val7[99],val7[100],val7[101],val7[102],val7[103],val8[0],val8[1],val8[2],val8[3],val8[4],val8[5],val8[6],val8[7],val8[8],val8[9],val8[10],val8[11],val8[12],val8[13],val8[14],val8[15],val8[16],val8[17],val8[18],val8[19],val8[20],val8[21],val8[22],val8[23]}*15)+32767)/65536), (((acc2*15)+alu19+(alu23*15)+32767)/65536)), __builtin_HEXAGON_V6_vpackwh_sat_128B((((acc1*15)+alu19+(alu22*15)+32767)/65536), (((acc0*15)+alu19+(alu21*15)+32767)/65536))); | |
*((unsigned_char128*)((data0+alu20))) = alu24; | |
} | |
} | |
__attribute__((noinline)) void E_8_128(float* restrict __attribute__((align_value(128))) data0, unsigned char* restrict __attribute__((align_value(128))) data1, int global_idx_0, void* sync) { | |
int g0 = global_idx_0; /* 0 */ | |
_Bool alu0 = ((g0!=0)!=1); | |
int alu1 = (alu0?0:4); | |
int alu2 = (alu0?4:8); | |
for (int ridx0 = alu1; ridx0 < alu2; ridx0++) { | |
int alu3 = (ridx0<<7); | |
unsigned_char104 val0 = *((unsigned_char104*)((data1+alu3))); | |
unsigned_char24 val1 = ((ridx0<7)?*((unsigned_char24*)((data1+(alu3+104)))):(unsigned_char24){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}); | |
int128 cast0 = __builtin_convertvector((unsigned_char128){val0[0],val0[1],val0[2],val0[3],val0[4],val0[5],val0[6],val0[7],val0[8],val0[9],val0[10],val0[11],val0[12],val0[13],val0[14],val0[15],val0[16],val0[17],val0[18],val0[19],val0[20],val0[21],val0[22],val0[23],val0[24],val0[25],val0[26],val0[27],val0[28],val0[29],val0[30],val0[31],val0[32],val0[33],val0[34],val0[35],val0[36],val0[37],val0[38],val0[39],val0[40],val0[41],val0[42],val0[43],val0[44],val0[45],val0[46],val0[47],val0[48],val0[49],val0[50],val0[51],val0[52],val0[53],val0[54],val0[55],val0[56],val0[57],val0[58],val0[59],val0[60],val0[61],val0[62],val0[63],val0[64],val0[65],val0[66],val0[67],val0[68],val0[69],val0[70],val0[71],val0[72],val0[73],val0[74],val0[75],val0[76],val0[77],val0[78],val0[79],val0[80],val0[81],val0[82],val0[83],val0[84],val0[85],val0[86],val0[87],val0[88],val0[89],val0[90],val0[91],val0[92],val0[93],val0[94],val0[95],val0[96],val0[97],val0[98],val0[99],val0[100],val0[101],val0[102],val0[103],val1[0],val1[1],val1[2],val1[3],val1[4],val1[5],val1[6],val1[7],val1[8],val1[9],val1[10],val1[11],val1[12],val1[13],val1[14],val1[15],val1[16],val1[17],val1[18],val1[19],val1[20],val1[21],val1[22],val1[23]}, int128); | |
float128 cast1 = __builtin_convertvector((cast0+-70), float128); | |
*((float128*)((data0+alu3))) = (cast1*0.19615031778812408f); | |
} | |
}void batched(float* arg0,signed char* cbuf0,signed char* cbuf1, int gl0, void* sync) { | |
r_112_28_3_3_3_32_4((unsigned char*)(cbuf0 + 155648),(unsigned char*)(cbuf0 + 4096),(unsigned char*)(cbuf0 + 557056),(int*)(cbuf0 + 561152), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[0])); | |
r_112_28_3_3_32_4((unsigned char*)(cbuf0 + 565248),(unsigned char*)(cbuf0 + 155648),(unsigned char*)(cbuf0 + 966656),(int*)(cbuf0 + 970752), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[1])); | |
r_3136_4_8_32_4((unsigned char*)(cbuf0 + 974848),(unsigned char*)(cbuf0 + 565248),(unsigned char*)(cbuf0 + 1376256),(int*)(cbuf0 + 1380352), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[2])); | |
r_3136_4_8_32_4_3((unsigned char*)(cbuf0 + 1384448),(unsigned char*)(cbuf0 + 974848),(unsigned char*)(cbuf0 + 2588672),(int*)(cbuf0 + 2592768), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[3])); | |
r_56_3_14_3_3_32_4((unsigned char*)(cbuf0 + 2596864),(unsigned char*)(cbuf0 + 1384448),(unsigned char*)(cbuf0 + 2899968),(int*)(cbuf0 + 2904064), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[4])); | |
r_784_12_8_32_4((unsigned char*)(cbuf0 + 2908160),(unsigned char*)(cbuf0 + 2596864),(unsigned char*)(cbuf0 + 3010560),(int*)(cbuf0 + 3014656), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[5])); | |
r_784_4_8_32_4_5((unsigned char*)(cbuf0 + 3018752),(unsigned char*)(cbuf0 + 2908160),(unsigned char*)(cbuf0 + 3522560),(int*)(cbuf0 + 3530752), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[6])); | |
r_56_5_14_3_3_32_4((unsigned char*)(cbuf0 + 3534848),(unsigned char*)(cbuf0 + 3018752),(unsigned char*)(cbuf0 + 4038656),(int*)(cbuf0 + 4042752), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[7])); | |
r_784_20_8_32_4((unsigned char*)(cbuf0 + 4046848),(unsigned char*)(cbuf0 + 3534848),(unsigned char*)(cbuf0 + 4149248),(int*)(cbuf0 + 4157440), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[8])); | |
E_784_128((unsigned char*)(cbuf0 + 4161536),(unsigned char*)(cbuf0 + 2908160),(unsigned char*)(cbuf0 + 4046848), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[9])); | |
r_784_4_8_32_4_5n1((unsigned char*)(cbuf0 + 4263936),(unsigned char*)(cbuf0 + 4161536),(unsigned char*)(cbuf0 + 4767744),(int*)(cbuf0 + 4775936), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[10])); | |
r_28_5_7_3_3_32_4((unsigned char*)(cbuf0 + 4780032),(unsigned char*)(cbuf0 + 4263936),(unsigned char*)(cbuf0 + 4907008),(int*)(cbuf0 + 4911104), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[11])); | |
r_196_20_8_32_4((unsigned char*)(cbuf0 + 4915200),(unsigned char*)(cbuf0 + 4780032),(unsigned char*)(cbuf0 + 4943872),(int*)(cbuf0 + 4952064), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[12])); | |
r_196_4_8_32_4_6((unsigned char*)(cbuf0 + 4956160),(unsigned char*)(cbuf0 + 4915200),(unsigned char*)(cbuf0 + 5107712),(int*)(cbuf0 + 5115904), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[13])); | |
r_28_6_7_3_3_32_4((unsigned char*)(cbuf0 + 5120000),(unsigned char*)(cbuf0 + 4956160),(unsigned char*)(cbuf0 + 5271552),(int*)(cbuf0 + 5275648), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[14])); | |
r_196_24_8_32_4((unsigned char*)(cbuf0 + 5279744),(unsigned char*)(cbuf0 + 5120000),(unsigned char*)(cbuf0 + 5308416),(int*)(cbuf0 + 5316608), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[15])); | |
E_196_128((unsigned char*)(cbuf0 + 5320704),(unsigned char*)(cbuf0 + 4915200),(unsigned char*)(cbuf0 + 5279744), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[16])); | |
r_196_4_8_32_4_6n1((unsigned char*)(cbuf0 + 5349376),(unsigned char*)(cbuf0 + 5320704),(unsigned char*)(cbuf0 + 5500928),(int*)(cbuf0 + 5509120), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[17])); | |
r_28_6_7_3_3_32_4n1((unsigned char*)(cbuf0 + 5513216),(unsigned char*)(cbuf0 + 5349376),(unsigned char*)(cbuf0 + 5664768),(int*)(cbuf0 + 5668864), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[18])); | |
r_196_24_8_32_4n1((unsigned char*)(cbuf0 + 5672960),(unsigned char*)(cbuf0 + 5513216),(unsigned char*)(cbuf0 + 5701632),(int*)(cbuf0 + 5709824), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[19])); | |
E_196_128n1((unsigned char*)(cbuf0 + 5713920),(unsigned char*)(cbuf0 + 5320704),(unsigned char*)(cbuf0 + 5672960), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[20])); | |
r_196_4_8_32_4_6n2((unsigned char*)(cbuf0 + 5742592),(unsigned char*)(cbuf0 + 5713920),(unsigned char*)(cbuf0 + 5894144),(int*)(cbuf0 + 5902336), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[21])); | |
r_14_6_4_3_3_32_4((unsigned char*)(cbuf0 + 5906432),(unsigned char*)(cbuf0 + 5742592),(unsigned char*)(cbuf0 + 5947392),(int*)(cbuf0 + 5951488), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[22])); | |
r_49_24_8_32_4_2((unsigned char*)(cbuf0 + 5955584),(unsigned char*)(cbuf0 + 5906432),(unsigned char*)(cbuf0 + 5971968),(int*)(cbuf0 + 5984256), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[23])); | |
r_12_49_8_8_32_4((unsigned char*)(cbuf0 + 5988352),(unsigned char*)(cbuf0 + 5955584),(unsigned char*)(cbuf0 + 6066176),(int*)(cbuf0 + 6090752), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[24])); | |
r_12_14_4_3_3_32_4((unsigned char*)(cbuf0 + 6094848),(unsigned char*)(cbuf0 + 5988352),(unsigned char*)(cbuf0 + 6172672),(int*)(cbuf0 + 6180864), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[25])); | |
r_49_48_8_32_4_2((unsigned char*)(cbuf0 + 6184960),(unsigned char*)(cbuf0 + 6094848),(unsigned char*)(cbuf0 + 6201344),(int*)(cbuf0 + 6225920), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[26])); | |
E_98_128((unsigned char*)(cbuf0 + 6230016),(unsigned char*)(cbuf0 + 5955584),(unsigned char*)(cbuf0 + 6184960), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[27])); | |
r_12_49_8_8_32_4n1((unsigned char*)(cbuf0 + 6246400),(unsigned char*)(cbuf0 + 6230016),(unsigned char*)(cbuf0 + 6324224),(int*)(cbuf0 + 6348800), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[28])); | |
r_12_14_4_3_3_32_4n1((unsigned char*)(cbuf0 + 6352896),(unsigned char*)(cbuf0 + 6246400),(unsigned char*)(cbuf0 + 6430720),(int*)(cbuf0 + 6438912), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[29])); | |
r_49_48_8_32_4_2n1((unsigned char*)(cbuf0 + 6443008),(unsigned char*)(cbuf0 + 6352896),(unsigned char*)(cbuf0 + 6459392),(int*)(cbuf0 + 6483968), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[30])); | |
E_98_128n1((unsigned char*)(cbuf0 + 6488064),(unsigned char*)(cbuf0 + 6230016),(unsigned char*)(cbuf0 + 6443008), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[31])); | |
r_12_49_8_8_32_4n2((unsigned char*)(cbuf0 + 6504448),(unsigned char*)(cbuf0 + 6488064),(unsigned char*)(cbuf0 + 6582272),(int*)(cbuf0 + 6606848), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[32])); | |
r_12_14_4_3_3_32_4n2((unsigned char*)(cbuf0 + 6610944),(unsigned char*)(cbuf0 + 6504448),(unsigned char*)(cbuf0 + 6688768),(int*)(cbuf0 + 6696960), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[33])); | |
r_49_48_8_32_4_2n2((unsigned char*)(cbuf0 + 6701056),(unsigned char*)(cbuf0 + 6610944),(unsigned char*)(cbuf0 + 6717440),(int*)(cbuf0 + 6742016), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[34])); | |
E_98_128n2((unsigned char*)(cbuf0 + 6746112),(unsigned char*)(cbuf0 + 6488064),(unsigned char*)(cbuf0 + 6701056), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[35])); | |
r_12_49_8_8_32_4n3((unsigned char*)(cbuf0 + 6762496),(unsigned char*)(cbuf0 + 6746112),(unsigned char*)(cbuf0 + 6840320),(int*)(cbuf0 + 6864896), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[36])); | |
r_12_14_4_3_3_32_4n3((unsigned char*)(cbuf0 + 6868992),(unsigned char*)(cbuf0 + 6762496),(unsigned char*)(cbuf0 + 6946816),(int*)(cbuf0 + 6955008), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[37])); | |
r_49_48_8_32_4_3((unsigned char*)(cbuf0 + 6959104),(unsigned char*)(cbuf0 + 6868992),(unsigned char*)(cbuf0 + 6979584),(int*)(cbuf0 + 7016448), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[38])); | |
r_18_49_12_8_32_4((unsigned char*)(cbuf0 + 7020544),(unsigned char*)(cbuf0 + 6959104),(unsigned char*)(cbuf0 + 7135232),(int*)(cbuf0 + 7192576), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[39])); | |
r_18_14_4_3_3_32_4((unsigned char*)(cbuf0 + 7196672),(unsigned char*)(cbuf0 + 7020544),(unsigned char*)(cbuf0 + 7311360),(int*)(cbuf0 + 7319552), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[40])); | |
r_49_72_8_32_4_3((unsigned char*)(cbuf0 + 7323648),(unsigned char*)(cbuf0 + 7196672),(unsigned char*)(cbuf0 + 7344128),(int*)(cbuf0 + 7401472), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[41])); | |
E_147_128((unsigned char*)(cbuf0 + 7405568),(unsigned char*)(cbuf0 + 6959104),(unsigned char*)(cbuf0 + 7323648), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[42])); | |
r_18_49_12_8_32_4n1((unsigned char*)(cbuf0 + 7426048),(unsigned char*)(cbuf0 + 7405568),(unsigned char*)(cbuf0 + 7540736),(int*)(cbuf0 + 7598080), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[43])); | |
r_18_14_4_3_3_32_4n1((unsigned char*)(cbuf0 + 7602176),(unsigned char*)(cbuf0 + 7426048),(unsigned char*)(cbuf0 + 7716864),(int*)(cbuf0 + 7725056), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[44])); | |
r_49_72_8_32_4_3n1((unsigned char*)(cbuf0 + 7729152),(unsigned char*)(cbuf0 + 7602176),(unsigned char*)(cbuf0 + 7749632),(int*)(cbuf0 + 7806976), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[45])); | |
E_147_128n1((unsigned char*)(cbuf0 + 7811072),(unsigned char*)(cbuf0 + 7405568),(unsigned char*)(cbuf0 + 7729152), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[46])); | |
r_18_49_12_8_32_4n2((unsigned char*)(cbuf0 + 7831552),(unsigned char*)(cbuf0 + 7811072),(unsigned char*)(cbuf0 + 7946240),(int*)(cbuf0 + 8003584), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[47])); | |
r_18_7_2_3_3_32_4((unsigned char*)(cbuf0 + 8007680),(unsigned char*)(cbuf0 + 7831552),(unsigned char*)(cbuf0 + 8036352),(int*)(cbuf0 + 8044544), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[48])); | |
r_13_72_8_32_4_5((unsigned char*)(cbuf0 + 8048640),(unsigned char*)(cbuf0 + 8007680),(unsigned char*)(cbuf0 + 8056832),(int*)(cbuf0 + 8151040), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[49])); | |
r_30_13_20_8_32_4((unsigned char*)(cbuf0 + 8155136),(unsigned char*)(cbuf0 + 8048640),(unsigned char*)(cbuf0 + 8204288),(int*)(cbuf0 + 8359936), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[50])); | |
r_30_7_2_3_3_32_4((unsigned char*)(cbuf0 + 8364032),(unsigned char*)(cbuf0 + 8155136),(unsigned char*)(cbuf0 + 8413184),(int*)(cbuf0 + 8425472), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[51])); | |
r_13_120_8_32_4_5((unsigned char*)(cbuf0 + 8429568),(unsigned char*)(cbuf0 + 8364032),(unsigned char*)(cbuf0 + 8437760),(int*)(cbuf0 + 8593408), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[52])); | |
E_62_128((unsigned char*)(cbuf0 + 8597504),(unsigned char*)(cbuf0 + 8048640),(unsigned char*)(cbuf0 + 8429568), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[53])); | |
r_30_13_20_8_32_4n1((unsigned char*)(cbuf0 + 8605696),(unsigned char*)(cbuf0 + 8597504),(unsigned char*)(cbuf0 + 8654848),(int*)(cbuf0 + 8810496), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[54])); | |
r_30_7_2_3_3_32_4n1((unsigned char*)(cbuf0 + 8814592),(unsigned char*)(cbuf0 + 8605696),(unsigned char*)(cbuf0 + 8863744),(int*)(cbuf0 + 8876032), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[55])); | |
r_13_120_8_32_4_5n1((unsigned char*)(cbuf0 + 8880128),(unsigned char*)(cbuf0 + 8814592),(unsigned char*)(cbuf0 + 8888320),(int*)(cbuf0 + 9043968), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[56])); | |
E_62_128n1((unsigned char*)(cbuf0 + 9048064),(unsigned char*)(cbuf0 + 8597504),(unsigned char*)(cbuf0 + 8880128), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[57])); | |
r_30_13_20_8_32_4n2((unsigned char*)(cbuf0 + 9056256),(unsigned char*)(cbuf0 + 9048064),(unsigned char*)(cbuf0 + 9105408),(int*)(cbuf0 + 9261056), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[58])); | |
r_30_7_2_3_3_32_4n2((unsigned char*)(cbuf0 + 9265152),(unsigned char*)(cbuf0 + 9056256),(unsigned char*)(cbuf0 + 9314304),(int*)(cbuf0 + 9326592), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[59])); | |
r_10_13_120_8_32_4((unsigned char*)(cbuf0 + 9330688),(unsigned char*)(cbuf0 + 9265152),(unsigned char*)(cbuf0 + 9347072),(int*)(cbuf0 + 9654272), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[60])); | |
r_40_13_40_8_32_4((unsigned char*)(cbuf0 + 9658368),(unsigned char*)(cbuf0 + 9330688),(unsigned char*)(cbuf0 + 9723904),(int*)(cbuf0 + 10133504), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[61])); | |
r_10_49_128((unsigned char*)(cbuf0 + 10141696),(unsigned char*)(cbuf0 + 9658368), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[62])); | |
E_10_128((unsigned char*)(cbuf0 + 10145792),(unsigned char*)(cbuf0 + 10141696), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[63])); | |
E_10_128n1((unsigned char*)(cbuf0 + 10149888),(unsigned char*)(cbuf0 + 10145792), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[64])); | |
r_8_320_4_128((unsigned char*)(cbuf0 + 10153984),(unsigned char*)(cbuf0 + 10149888),(unsigned char*)(cbuf0 + 10158080),(int*)(cbuf0 + 11440128), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[65])); | |
E_8_128((float*)(cbuf1 + 0),(unsigned char*)(cbuf0 + 10153984), gl0, 0x0); | |
qurt_barrier_wait(&(((qurt_barrier_t*)sync)[66])); | |
} | |
typedef struct all_args { | |
int sz_or_val_0; int off0; void *buf_0; | |
int sz_or_val_1; int off1; void *buf_1; | |
int sz_or_val_2; int off2; void *buf_2; | |
void* sync; } all_args_t; | |
void threader(all_args_t* args) { | |
batched(args->buf_0, args->buf_1, args->buf_2, 1, args->sync); | |
qurt_thread_exit(0); }int entry(unsigned long long handle, unsigned int sc, remote_arg* pra) { | |
struct dcvs_v2_req req = {.type=7, .dcvs_enable=0, .set_latency=1, .latency=100, .set_dcvs_params=1, .target_corner = 6 /* TURBO */}; | |
HAP_power_set((void*)handle, (void*)&req); | |
if ((sc>>24) != 2) return 0; | |
qurt_barrier_t* sync = malloc(67 * sizeof(qurt_barrier_t)); | |
qurt_barrier_init(&sync[0], 2); | |
qurt_barrier_init(&sync[1], 2); | |
qurt_barrier_init(&sync[2], 2); | |
qurt_barrier_init(&sync[3], 2); | |
qurt_barrier_init(&sync[4], 2); | |
qurt_barrier_init(&sync[5], 2); | |
qurt_barrier_init(&sync[6], 2); | |
qurt_barrier_init(&sync[7], 2); | |
qurt_barrier_init(&sync[8], 2); | |
qurt_barrier_init(&sync[9], 2); | |
qurt_barrier_init(&sync[10], 2); | |
qurt_barrier_init(&sync[11], 2); | |
qurt_barrier_init(&sync[12], 2); | |
qurt_barrier_init(&sync[13], 2); | |
qurt_barrier_init(&sync[14], 2); | |
qurt_barrier_init(&sync[15], 2); | |
qurt_barrier_init(&sync[16], 2); | |
qurt_barrier_init(&sync[17], 2); | |
qurt_barrier_init(&sync[18], 2); | |
qurt_barrier_init(&sync[19], 2); | |
qurt_barrier_init(&sync[20], 2); | |
qurt_barrier_init(&sync[21], 2); | |
qurt_barrier_init(&sync[22], 2); | |
qurt_barrier_init(&sync[23], 2); | |
qurt_barrier_init(&sync[24], 2); | |
qurt_barrier_init(&sync[25], 2); | |
qurt_barrier_init(&sync[26], 2); | |
qurt_barrier_init(&sync[27], 2); | |
qurt_barrier_init(&sync[28], 2); | |
qurt_barrier_init(&sync[29], 2); | |
qurt_barrier_init(&sync[30], 2); | |
qurt_barrier_init(&sync[31], 2); | |
qurt_barrier_init(&sync[32], 2); | |
qurt_barrier_init(&sync[33], 2); | |
qurt_barrier_init(&sync[34], 2); | |
qurt_barrier_init(&sync[35], 2); | |
qurt_barrier_init(&sync[36], 2); | |
qurt_barrier_init(&sync[37], 2); | |
qurt_barrier_init(&sync[38], 2); | |
qurt_barrier_init(&sync[39], 2); | |
qurt_barrier_init(&sync[40], 2); | |
qurt_barrier_init(&sync[41], 2); | |
qurt_barrier_init(&sync[42], 2); | |
qurt_barrier_init(&sync[43], 2); | |
qurt_barrier_init(&sync[44], 2); | |
qurt_barrier_init(&sync[45], 2); | |
qurt_barrier_init(&sync[46], 2); | |
qurt_barrier_init(&sync[47], 2); | |
qurt_barrier_init(&sync[48], 2); | |
qurt_barrier_init(&sync[49], 2); | |
qurt_barrier_init(&sync[50], 2); | |
qurt_barrier_init(&sync[51], 2); | |
qurt_barrier_init(&sync[52], 2); | |
qurt_barrier_init(&sync[53], 2); | |
qurt_barrier_init(&sync[54], 2); | |
qurt_barrier_init(&sync[55], 2); | |
qurt_barrier_init(&sync[56], 2); | |
qurt_barrier_init(&sync[57], 2); | |
qurt_barrier_init(&sync[58], 2); | |
qurt_barrier_init(&sync[59], 2); | |
qurt_barrier_init(&sync[60], 2); | |
qurt_barrier_init(&sync[61], 2); | |
qurt_barrier_init(&sync[62], 2); | |
qurt_barrier_init(&sync[63], 2); | |
qurt_barrier_init(&sync[64], 2); | |
qurt_barrier_init(&sync[65], 2); | |
qurt_barrier_init(&sync[66], 2); | |
all_args_t args = { 0 }; | |
args.sz_or_val_0 = ((int*)pra[0].buf.pv)[0]; | |
args.sz_or_val_1 = ((int*)pra[0].buf.pv)[1]; | |
args.sz_or_val_2 = ((int*)pra[0].buf.pv)[2]; | |
args.off0 = ((int*)pra[1].buf.pv)[0]; | |
args.off1 = ((int*)pra[1].buf.pv)[1]; | |
args.off2 = ((int*)pra[1].buf.pv)[2]; | |
args.buf_0 = HAP_mmap(0,args.sz_or_val_0,3,0,pra[3].dma.fd,0)+args.off0; | |
args.buf_1 = HAP_mmap(0,args.sz_or_val_1,3,0,pra[4].dma.fd,0)+args.off1; | |
args.buf_2 = HAP_mmap(0,args.sz_or_val_2,3,0,pra[5].dma.fd,0)+args.off2; | |
args.sync = sync; | |
qurt_thread_attr_t attr = { 0 }; | |
attr.name[0] = 't'; | |
attr.priority = 255; | |
attr.asid = 0; | |
attr.stack_size = (64 << 10); | |
attr.stack_addr = malloc(attr.stack_size); | |
unsigned long long start = HAP_perf_get_time_us(); | |
qurt_thread_t thread_ = 0; qurt_thread_create(&thread_, &attr, (void (*)(void*))threader, (void*)&args); | |
batched(args.buf_0, args.buf_1, args.buf_2, 0, args.sync); | |
int status; | |
qurt_thread_join(thread_, &status); | |
*(unsigned long long *)(pra[2].buf.pv) = HAP_perf_get_time_us() - start; | |
HAP_munmap(args.buf_0, args.sz_or_val_0); | |
HAP_munmap(args.buf_1, args.sz_or_val_1); | |
HAP_munmap(args.buf_2, args.sz_or_val_2); | |
free(attr.stack_addr); | |
free(sync); | |
return 0; } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment