Last active
May 29, 2020 03:11
-
-
Save BeMg/622f4b23e2f4d55ccae5644c4db09265 to your computer and use it in GitHub Desktop.
aaa
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel void Conv1D(__global int *array, __global int *filter, int N, __global int *output) { | |
int idx = get_global_id(0); | |
for (int i=0; i<N; i++) { | |
output[idx] += array[idx+i] * filter[i]; | |
barrier(CLK_GLOBAL_MEM_FENCE); | |
printf("Juse for side-effect.\n"); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'conv1d.bc' | |
source_filename = "conv1d.bc" | |
target datalayout = "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | |
target triple = "x86_64-unknown-unknown-unknown" | |
@kernel_config_l2 = external addrspace(2) constant [32 x i32] | |
@str = private unnamed_addr constant [22 x i8] c"Juse for side-effect.\00" | |
; Function Attrs: noduplicate | |
declare protected void @barrier(i32) #0 | |
; Function Attrs: nounwind | |
declare i32 @puts(i8* nocapture readonly) #1 | |
; Function Attrs: noinline nounwind | |
define void @Conv1D(i32 addrspace(1)* nocapture readonly %array, i32 addrspace(1)* nocapture readonly %filter, i32 %N, i32 addrspace(1)* nocapture %output) #2 { | |
.r_entry2: | |
%0 = call i32 @__core_num() | |
%1 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 15), align 4 | |
%2 = mul i32 %0, %1 | |
%3 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 14), align 4 | |
%4 = add i32 %3, %2 | |
%5 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 4), align 4 | |
%6 = mul i32 4, %5 | |
%7 = add i32 %4, %6 | |
%8 = add i32 %7, 7 | |
%9 = and i32 %8, -8 | |
%.i.01.ex_phi.pocl_context = inttoptr i32 %4 to i32*, !ocl.restrict !3 | |
call void @barrier(i32 0) #1 | |
%10 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 10), align 4, !tbaa !4 | |
%11 = icmp sgt i32 %N, 0 | |
br i1 %11, label %pregion_for_init7, label %pregion_for_cond13.preheader | |
pregion_for_cond13.preheader: ; preds = %.r_entry2 | |
br label %pregion_for_cond13 | |
pregion_for_init7: ; preds = %.r_entry2 | |
store i32 0, i32* %.i.01.ex_phi.pocl_context, align 64 | |
br label %pregion_for_cond9 | |
pregion_for_cond9: ; preds = %pregion_for_cond9, %pregion_for_init7 | |
%12 = phi i32 [ 1, %pregion_for_init7 ], [ %14, %pregion_for_cond9 ] | |
%13 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %12 | |
store i32 0, i32* %13, align 4, !llvm.mem.parallel_loop_access !8 | |
%14 = add i32 %12, 1 | |
%15 = icmp slt i32 %14, %5 | |
br i1 %15, label %pregion_for_cond9, label %.lr.ph.loopbarrier, !llvm.loop !9 | |
.lr.ph.loopbarrier: ; preds = %pregion_for_cond9 | |
call void @barrier(i32 0) #1 | |
br label %pregion_for_cond6 | |
pregion_for_cond6: ; preds = %pregion_for_cond6.backedge, %.lr.ph.loopbarrier | |
%16 = phi i32 [ 0, %.lr.ph.loopbarrier ], [ %.be, %pregion_for_cond6.backedge ] | |
%17 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %16 | |
%18 = load i32, i32* %17, align 4, !llvm.mem.parallel_loop_access !10 | |
%19 = add i32 %16, %10 | |
%20 = add nsw i32 %18, %19 | |
%21 = getelementptr inbounds i32, i32 addrspace(1)* %array, i32 %20 | |
%22 = load i32, i32 addrspace(1)* %21, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10 | |
%23 = getelementptr inbounds i32, i32 addrspace(1)* %filter, i32 %18 | |
%24 = load i32, i32 addrspace(1)* %23, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10 | |
%25 = mul nsw i32 %24, %22 | |
%26 = getelementptr inbounds i32, i32 addrspace(1)* %output, i32 %19 | |
%27 = load i32, i32 addrspace(1)* %26, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10 | |
%28 = add nsw i32 %27, %25 | |
store i32 %28, i32 addrspace(1)* %26, align 4, !tbaa !4, !llvm.mem.parallel_loop_access !10 | |
%29 = add i32 %16, 1 | |
%30 = icmp slt i32 %29, %5 | |
br i1 %30, label %pregion_for_cond6.backedge, label %pregion_for_init, !llvm.loop !11 | |
pregion_for_init: ; preds = %pregion_for_cond6 | |
tail call void @barrier(i32 2) #3 | |
%31 = add nuw nsw i32 %18, 1 | |
br label %pregion_for_cond | |
pregion_for_cond: ; preds = %pregion_for_cond, %pregion_for_init | |
%32 = phi i32 [ 0, %pregion_for_init ], [ %34, %pregion_for_cond ] | |
%puts = tail call i32 @puts(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @str, i32 0, i32 0)), !llvm.mem.parallel_loop_access !12 | |
%33 = getelementptr i32, i32* %.i.01.ex_phi.pocl_context, i32 %32 | |
store i32 %31, i32* %33, align 4, !llvm.mem.parallel_loop_access !12 | |
%34 = add i32 %32, 1 | |
%35 = icmp slt i32 %34, %5 | |
br i1 %35, label %pregion_for_cond, label %.r_entry.brexitbarrier.latchbarrier, !llvm.loop !13 | |
.r_entry.brexitbarrier.latchbarrier: ; preds = %pregion_for_cond | |
%36 = icmp slt i32 %31, %N | |
call void @barrier(i32 0) #1 | |
br i1 %36, label %pregion_for_cond6.backedge, label %exit.barrier3 | |
pregion_for_cond6.backedge: ; preds = %.r_entry.brexitbarrier.latchbarrier, %pregion_for_cond6 | |
%.be = phi i32 [ %29, %pregion_for_cond6 ], [ 0, %.r_entry.brexitbarrier.latchbarrier ] | |
br label %pregion_for_cond6 | |
pregion_for_cond13: ; preds = %pregion_for_cond13, %pregion_for_cond13.preheader | |
%37 = phi i32 [ %38, %pregion_for_cond13 ], [ 1, %pregion_for_cond13.preheader ] | |
%38 = add i32 %37, 1 | |
%39 = icmp slt i32 %38, %5 | |
br i1 %39, label %pregion_for_cond13, label %exit.barrier, !llvm.loop !14 | |
exit.barrier: ; preds = %pregion_for_cond13 | |
call void @barrier(i32 0) #1 | |
br label %UnifiedReturnBlock | |
exit.barrier3: ; preds = %.r_entry.brexitbarrier.latchbarrier | |
call void @barrier(i32 0) #1 | |
br label %UnifiedReturnBlock | |
UnifiedReturnBlock: ; preds = %exit.barrier3, %exit.barrier | |
ret void | |
} | |
declare i32 @__core_num() | |
attributes #0 = { noduplicate "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #1 = { nounwind } | |
attributes #2 = { noinline nounwind "_kernel_local_size"="0" "_wi_alloca_size"="4" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
attributes #3 = { noduplicate nounwind } | |
!llvm.ident = !{!0} | |
!ocl.restrict = !{!1} | |
!opencl.kernels = !{!2} | |
!0 = !{!"clang version 3.6.0 (git://git.ti.com/opencl/clang.git 5b006f07bdc22b5ae6917eecdfe243908dd7b029) (git://git.ti.com/opencl/llvm.git 09780c6750b30da81e4a0a805aedf1699fbc37c7)"} | |
!1 = distinct !{!1} | |
!2 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*)* @Conv1D} | |
!3 = distinct !{!3} | |
!4 = !{!5, !5, i64 0} | |
!5 = !{!"int", !6, i64 0} | |
!6 = !{!"omnipotent char", !7, i64 0} | |
!7 = !{!"Simple C/C++ TBAA"} | |
!8 = !{!9} | |
!9 = distinct !{!9} | |
!10 = !{!11} | |
!11 = distinct !{!11} | |
!12 = !{!13} | |
!13 = distinct !{!13} | |
!14 = distinct !{!14} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'conv1d.bc' | |
source_filename = "conv1d.bc" | |
target datalayout = "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" | |
target triple = "c6000-unknown-unknown-unknown" | |
@kernel_config_l2 = external addrspace(2) constant [32 x i32] | |
; Function Attrs: nounwind | |
define void @Conv1D(i32 addrspace(1)* nocapture readonly %array, i32 addrspace(1)* nocapture readonly %filter, i32 %N, i32 addrspace(1)* nocapture %output) #0 { | |
.entry: | |
%0 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 10), align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11 | |
%1 = load i32, i32 addrspace(2)* getelementptr inbounds ([32 x i32], [32 x i32] addrspace(2)* @kernel_config_l2, i32 0, i32 4), align 4, !llvm.mem.parallel_loop_access !11 | |
%2 = icmp sgt i32 %1, 0 | |
br i1 %2, label %.bodyTop.preheader, label %.exit | |
.bodyTop.preheader: ; preds = %.entry | |
br label %.bodyTop | |
.bodyTop: ; preds = %.bodyEnd, %.bodyTop.preheader | |
%3 = phi i32 [ %18, %.bodyEnd ], [ 0, %.bodyTop.preheader ] | |
%4 = add i32 %3, %0 | |
%5 = icmp sgt i32 %N, 0 | |
br i1 %5, label %.lr.ph, label %.bodyEnd | |
.lr.ph: ; preds = %.bodyTop | |
%6 = getelementptr inbounds i32, i32 addrspace(1)* %output, i32 %4 | |
%.pre = load i32, i32 addrspace(1)* %6, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11 | |
br label %7 | |
7: ; preds = %7, %.lr.ph | |
%8 = phi i32 [ %.pre, %.lr.ph ], [ %15, %7 ] | |
%i.01 = phi i32 [ 0, %.lr.ph ], [ %16, %7 ] | |
%9 = add nsw i32 %i.01, %4 | |
%10 = getelementptr inbounds i32, i32 addrspace(1)* %array, i32 %9 | |
%11 = load i32, i32 addrspace(1)* %10, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11 | |
%12 = getelementptr inbounds i32, i32 addrspace(1)* %filter, i32 %i.01 | |
%13 = load i32, i32 addrspace(1)* %12, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11 | |
%14 = mul nsw i32 %13, %11 | |
%15 = add nsw i32 %8, %14 | |
store i32 %15, i32 addrspace(1)* %6, align 4, !tbaa !7, !llvm.mem.parallel_loop_access !11 | |
%16 = add nuw nsw i32 %i.01, 1 | |
%17 = icmp slt i32 %16, %N | |
br i1 %17, label %7, label %.bodyEnd.loopexit | |
.bodyEnd.loopexit: ; preds = %7 | |
br label %.bodyEnd | |
.bodyEnd: ; preds = %.bodyEnd.loopexit, %.bodyTop | |
%18 = add i32 %3, 1 | |
%19 = icmp slt i32 %18, %1 | |
br i1 %19, label %.bodyTop, label %.exit.loopexit, !llvm.loop !12 | |
.exit.loopexit: ; preds = %.bodyEnd | |
br label %.exit | |
.exit: ; preds = %.exit.loopexit, %.entry | |
ret void | |
} | |
attributes #0 = { nounwind "_kernel_local_size"="0" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" } | |
!opencl.kernels = !{!0} | |
!llvm.ident = !{!6} | |
!0 = !{void (i32 addrspace(1)*, i32 addrspace(1)*, i32, i32 addrspace(1)*)* @Conv1D, !1, !2, !3, !4, !5} | |
!1 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 0, i32 1} | |
!2 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"} | |
!3 = !{!"kernel_arg_type", !"int*", !"int*", !"int", !"int*"} | |
!4 = !{!"kernel_arg_base_type", !"int*", !"int*", !"int", !"int*"} | |
!5 = !{!"kernel_arg_type_qual", !"", !"", !"", !""} | |
!6 = !{!"clang version 3.6.0 (git://git.ti.com/opencl/clang.git 5b006f07bdc22b5ae6917eecdfe243908dd7b029) (git://git.ti.com/opencl/llvm.git 09780c6750b30da81e4a0a805aedf1699fbc37c7)"} | |
!7 = !{!8, !8, i64 0} | |
!8 = !{!"int", !9, i64 0} | |
!9 = !{!"omnipotent char", !10, i64 0} | |
!10 = !{!"Simple C/C++ TBAA"} | |
!11 = !{!12} | |
!12 = distinct !{!12} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment