Created
April 9, 2020 05:09
-
-
Save BeMg/744539492eb543dde35e49dd7c46f6ef to your computer and use it in GitHub Desktop.
tvm stride load store example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// attr [PaddedInput] storage_scope = "global" | |
allocate PaddedInput[float32 * 6553600] | |
// attr [DepthwiseConv2d] storage_scope = "global" | |
allocate DepthwiseConv2d[float32 * 2359296] | |
produce PaddedInput { | |
for (i1, 0, 256) { | |
for (i2, 0, 160) { | |
for (i3, 0, 160) { | |
PaddedInput[((((i1*160) + i2)*160) + i3)] = tvm_if_then_else(((((32 <= i2) && (i2 < 128)) && (32 <= i3)) && (i3 < 128)), Input[(((((i1*96) + i2)*96) + i3) + -3104)], 0.000000f) | |
} | |
} | |
} | |
} | |
produce DepthwiseConv2d { | |
for (c, 0, 256) { | |
for (i, 0, 96) { | |
for (j, 0, 96) { | |
DepthwiseConv2d[((((c*96) + i)*96) + j)] = 0.000000f | |
for (di, 0, 3) { | |
for (dj, 0, 3) { | |
DepthwiseConv2d[((((c*96) + i)*96) + j)] = (DepthwiseConv2d[((((c*96) + i)*96) + j)] + (PaddedInput[((((((((c*5) + di)*32) + i)*5) + dj)*32) + j)]*Filter[((((c*3) + di)*3) + dj)])) | |
} | |
} | |
} | |
} | |
} | |
} | |
// attr [PaddedInput] storage_scope = "global" | |
allocate PaddedInput[float32 * 6553600] | |
// attr [DepthwiseConv2d] storage_scope = "global" | |
allocate DepthwiseConv2d[float32x8 * 294912] | |
produce PaddedInput { | |
for (i1, 0, 256) { | |
for (i2, 0, 160) { | |
for (i3, 0, 160) { | |
PaddedInput[((((i1*160) + i2)*160) + i3)] = tvm_if_then_else(((((32 <= i2) && (i2 < 128)) && (32 <= i3)) && (i3 < 128)), Input[(((((i1*96) + i2)*96) + i3) + -3104)], 0.000000f) | |
} | |
} | |
} | |
} | |
produce DepthwiseConv2d { | |
for (b.i.j.fused.fused, 0, 9216) { | |
for (c.outer, 0, 32) { | |
DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] = x8(0.000000f) | |
for (di, 0, 3) { | |
for (dj, 0, 3) { | |
DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] = (DepthwiseConv2d[ramp(((c.outer*73728) + b.i.j.fused.fused), 9216, 8)] + (PaddedInput[ramp(((((((((c.outer*40) + di)*32) + (b.i.j.fused.fused/96))*5) + dj)*32) + (b.i.j.fused.fused % 96)), 25600, 8)]*Filter[ramp(((((c.outer*24) + di)*3) + dj), 9, 8)])) | |
} | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ModuleID = 'default_function' | |
source_filename = "default_function" | |
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" | |
target triple = "riscv64-unknown-elf" | |
%0 = type { i8*, %1, i32, %2, i64*, i64*, i64 } | |
%1 = type { i32, i32 } | |
%2 = type { i8, i8, i16 } | |
@__TVMAPISetLastError = linkonce dllexport local_unnamed_addr global void (i8*)* null, align 8 | |
@.str = private constant [69 x i8] c"Assert fail: (num_args == 2), default_function: num_args should be 2\00", align 1 | |
@.str.1 = private constant [202 x i8] c"Assert fail: ((((1 == int32(arg0.strides[3])) && (96 == int32(arg0.strides[2]))) && (9216 == int32(arg0.strides[1]))) && (2359296 == int32(arg0.strides[0]))), arg0.strides: expected to be compact array\00", align 1 | |
@.str.2 = private constant [192 x i8] c"Assert fail: ((((1 == int32(arg1.strides[3])) && (3 == int32(arg1.strides[2]))) && (9 == int32(arg1.strides[1]))) && (9 == int32(arg1.strides[0]))), arg1.strides: expected to be compact array\00", align 1 | |
@.str.3 = private constant [144 x i8] c"Assert fail: ((((arg0.code == 3) || (arg0.code == 13)) || (arg0.code == 7)) || (arg0.code == 4)), default_function: Expect arg[0] to be pointer\00", align 1 | |
@.str.4 = private constant [144 x i8] c"Assert fail: ((((arg1.code == 3) || (arg1.code == 13)) || (arg1.code == 7)) || (arg1.code == 4)), default_function: Expect arg[1] to be pointer\00", align 1 | |
@.str.5 = private constant [55 x i8] c"Assert fail: (dev_type == 1), device_type need to be 1\00", align 1 | |
@.str.6 = private constant [81 x i8] c"Assert fail: (4 == tvm_struct_get(arg0, 0, 4)), arg0.ndim is expected to equal 4\00", align 1 | |
@.str.7 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg0, 0, 5) == (uint8)2) && (tvm_struct_get(arg0, 0, 6) == (uint8)32)) && (tvm_struct_get(arg0, 0, 7) == (uint16)1)), arg0.dtype is expected to be float32\00", align 1 | |
@.str.8 = private constant [95 x i8] c"Assert fail: (int32(arg0.shape[0]) == 1), Argument arg0.shape[0] has an unsatisfied constraint\00", align 1 | |
@.str.9 = private constant [97 x i8] c"Assert fail: (int32(arg0.shape[1]) == 256), Argument arg0.shape[1] has an unsatisfied constraint\00", align 1 | |
@.str.10 = private constant [96 x i8] c"Assert fail: (int32(arg0.shape[2]) == 96), Argument arg0.shape[2] has an unsatisfied constraint\00", align 1 | |
@.str.11 = private constant [96 x i8] c"Assert fail: (int32(arg0.shape[3]) == 96), Argument arg0.shape[3] has an unsatisfied constraint\00", align 1 | |
@.str.12 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg0, 0, 8) == (uint64)0), Argument arg0.byte_offset has an unsatisfied constraint\00", align 1 | |
@.str.13 = private constant [81 x i8] c"Assert fail: (4 == tvm_struct_get(arg1, 0, 4)), arg1.ndim is expected to equal 4\00", align 1 | |
@.str.14 = private constant [186 x i8] c"Assert fail: (((tvm_struct_get(arg1, 0, 5) == (uint8)2) && (tvm_struct_get(arg1, 0, 6) == (uint8)32)) && (tvm_struct_get(arg1, 0, 7) == (uint16)1)), arg1.dtype is expected to be float32\00", align 1 | |
@.str.15 = private constant [97 x i8] c"Assert fail: (int32(arg1.shape[0]) == 256), Argument arg1.shape[0] has an unsatisfied constraint\00", align 1 | |
@.str.16 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[1]) == 1), Argument arg1.shape[1] has an unsatisfied constraint\00", align 1 | |
@.str.17 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[2]) == 3), Argument arg1.shape[2] has an unsatisfied constraint\00", align 1 | |
@.str.18 = private constant [95 x i8] c"Assert fail: (int32(arg1.shape[3]) == 3), Argument arg1.shape[3] has an unsatisfied constraint\00", align 1 | |
@.str.19 = private constant [112 x i8] c"Assert fail: (tvm_struct_get(arg1, 0, 8) == (uint64)0), Argument arg1.byte_offset has an unsatisfied constraint\00", align 1 | |
@.str.20 = private constant [105 x i8] c"Assert fail: (1 == tvm_struct_get(arg1, 0, 10)), Argument arg1.device_type has an unsatisfied constraint\00", align 1 | |
@.str.21 = private constant [107 x i8] c"Assert fail: (dev_id == tvm_struct_get(arg1, 0, 9)), Argument arg1.device_id has an unsatisfied constraint\00", align 1 | |
@__TVMBackendAllocWorkspace = linkonce dllexport local_unnamed_addr global i8* (i32, i32, i64, i32, i32)* null, align 8 | |
@__TVMBackendFreeWorkspace = linkonce dllexport local_unnamed_addr global i32 (i32, i32, i8*)* null, align 8 | |
@__tvm_main__ = weak local_unnamed_addr constant [17 x i8] c"default_function\00", align 1 | |
define dllexport i32 @default_function(i8* noalias nocapture readonly, i8* noalias nocapture readonly, i32) local_unnamed_addr { | |
entry: | |
%3 = icmp eq i32 %2, 2 | |
br i1 %3, label %assert_end, label %assert_fail, !prof !1 | |
assert_fail: ; preds = %entry | |
%4 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %4(i8* getelementptr inbounds ([69 x i8], [69 x i8]* @.str, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end: ; preds = %entry | |
%5 = bitcast i8* %0 to %0** | |
%6 = load %0*, %0** %5, align 8 | |
%7 = bitcast i8* %1 to i32* | |
%8 = load i32, i32* %7, align 4, !tbaa !5 | |
%9 = getelementptr inbounds i8, i8* %0, i64 8 | |
%10 = bitcast i8* %9 to %0** | |
%11 = load %0*, %0** %10, align 8 | |
%12 = getelementptr inbounds i8, i8* %1, i64 4 | |
%13 = bitcast i8* %12 to i32* | |
%14 = load i32, i32* %13, align 4, !tbaa !19 | |
%15 = getelementptr inbounds %0, %0* %6, i64 0, i32 0 | |
%16 = load i8*, i8** %15, align 8 | |
%17 = getelementptr inbounds %0, %0* %6, i64 0, i32 4 | |
%18 = load i64*, i64** %17, align 8 | |
%19 = getelementptr inbounds %0, %0* %6, i64 0, i32 5 | |
%20 = load i64*, i64** %19, align 8 | |
%21 = icmp eq i64* %20, null | |
br i1 %21, label %if_end, label %if_then, !prof !21 | |
if_then: ; preds = %assert_end | |
%22 = load i64, i64* %20, align 8, !tbaa !22 | |
%23 = trunc i64 %22 to i32 | |
%24 = icmp eq i32 %23, 2359296 | |
%25 = getelementptr inbounds i64, i64* %20, i64 1 | |
%26 = load i64, i64* %25, align 8, !tbaa !36 | |
%27 = trunc i64 %26 to i32 | |
%28 = icmp eq i32 %27, 9216 | |
%29 = getelementptr inbounds i64, i64* %20, i64 2 | |
%30 = load i64, i64* %29, align 8, !tbaa !38 | |
%31 = trunc i64 %30 to i32 | |
%32 = icmp eq i32 %31, 96 | |
%33 = getelementptr inbounds i64, i64* %20, i64 3 | |
%34 = load i64, i64* %33, align 8, !tbaa !41 | |
%35 = trunc i64 %34 to i32 | |
%36 = icmp eq i32 %35, 1 | |
%37 = and i1 %32, %36 | |
%38 = and i1 %28, %37 | |
%39 = and i1 %24, %38 | |
br i1 %39, label %if_end, label %assert_fail1, !prof !1 | |
if_end: ; preds = %assert_end, %if_then | |
%40 = getelementptr inbounds %0, %0* %6, i64 0, i32 1, i32 0 | |
%41 = load i32, i32* %40, align 4 | |
%42 = getelementptr inbounds %0, %0* %6, i64 0, i32 1, i32 1 | |
%43 = load i32, i32* %42, align 4 | |
%44 = getelementptr inbounds %0, %0* %11, i64 0, i32 0 | |
%45 = load i8*, i8** %44, align 8 | |
%46 = getelementptr inbounds %0, %0* %11, i64 0, i32 4 | |
%47 = load i64*, i64** %46, align 8 | |
%48 = getelementptr inbounds %0, %0* %11, i64 0, i32 5 | |
%49 = load i64*, i64** %48, align 8 | |
%50 = icmp eq i64* %49, null | |
br i1 %50, label %if_end4, label %if_then3, !prof !21 | |
assert_fail1: ; preds = %if_then | |
%51 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %51(i8* getelementptr inbounds ([202 x i8], [202 x i8]* @.str.1, i64 0, i64 0)) | |
ret i32 -1 | |
if_then3: ; preds = %if_end | |
%52 = load i64, i64* %49, align 8, !tbaa !43 | |
%53 = trunc i64 %52 to i32 | |
%54 = icmp eq i32 %53, 9 | |
%55 = getelementptr inbounds i64, i64* %49, i64 1 | |
%56 = load i64, i64* %55, align 8, !tbaa !57 | |
%57 = trunc i64 %56 to i32 | |
%58 = icmp eq i32 %57, 9 | |
%59 = getelementptr inbounds i64, i64* %49, i64 2 | |
%60 = load i64, i64* %59, align 8, !tbaa !59 | |
%61 = trunc i64 %60 to i32 | |
%62 = icmp eq i32 %61, 3 | |
%63 = getelementptr inbounds i64, i64* %49, i64 3 | |
%64 = load i64, i64* %63, align 8, !tbaa !62 | |
%65 = trunc i64 %64 to i32 | |
%66 = icmp eq i32 %65, 1 | |
%67 = and i1 %62, %66 | |
%68 = and i1 %58, %67 | |
%69 = and i1 %54, %68 | |
br i1 %69, label %if_end4, label %assert_fail5, !prof !1 | |
if_end4: ; preds = %if_end, %if_then3 | |
switch i32 %8, label %assert_fail7 [ | |
i32 13, label %assert_end8 | |
i32 7, label %assert_end8 | |
i32 4, label %assert_end8 | |
i32 3, label %assert_end8 | |
] | |
assert_fail5: ; preds = %if_then3 | |
%70 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %70(i8* getelementptr inbounds ([192 x i8], [192 x i8]* @.str.2, i64 0, i64 0)) | |
ret i32 -1 | |
assert_fail7: ; preds = %if_end4 | |
%71 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %71(i8* getelementptr inbounds ([144 x i8], [144 x i8]* @.str.3, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end8: ; preds = %if_end4, %if_end4, %if_end4, %if_end4 | |
switch i32 %14, label %assert_fail9 [ | |
i32 13, label %assert_end10 | |
i32 7, label %assert_end10 | |
i32 4, label %assert_end10 | |
i32 3, label %assert_end10 | |
] | |
assert_fail9: ; preds = %assert_end8 | |
%72 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %72(i8* getelementptr inbounds ([144 x i8], [144 x i8]* @.str.4, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end10: ; preds = %assert_end8, %assert_end8, %assert_end8, %assert_end8 | |
%73 = icmp eq i32 %41, 1 | |
br i1 %73, label %assert_end12, label %assert_fail11, !prof !1 | |
assert_fail11: ; preds = %assert_end10 | |
%74 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %74(i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.5, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end12: ; preds = %assert_end10 | |
%75 = getelementptr inbounds %0, %0* %6, i64 0, i32 2 | |
%76 = load i32, i32* %75, align 4 | |
%77 = icmp eq i32 %76, 4 | |
br i1 %77, label %assert_end14, label %assert_fail13, !prof !1 | |
assert_fail13: ; preds = %assert_end12 | |
%78 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %78(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.6, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end14: ; preds = %assert_end12 | |
%79 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 2 | |
%80 = load i16, i16* %79, align 2 | |
%81 = icmp eq i16 %80, 1 | |
%82 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 1 | |
%83 = load i8, i8* %82, align 1 | |
%84 = icmp eq i8 %83, 32 | |
%85 = getelementptr inbounds %0, %0* %6, i64 0, i32 3, i32 0 | |
%86 = load i8, i8* %85, align 1 | |
%87 = icmp eq i8 %86, 2 | |
%88 = and i1 %84, %87 | |
%89 = and i1 %81, %88 | |
br i1 %89, label %assert_end16, label %assert_fail15, !prof !1 | |
assert_fail15: ; preds = %assert_end14 | |
%90 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %90(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.7, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end16: ; preds = %assert_end14 | |
%91 = load i64, i64* %18, align 8, !tbaa !64 | |
%92 = trunc i64 %91 to i32 | |
%93 = icmp eq i32 %92, 1 | |
br i1 %93, label %assert_end18, label %assert_fail17, !prof !1 | |
assert_fail17: ; preds = %assert_end16 | |
%94 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %94(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.8, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end18: ; preds = %assert_end16 | |
%95 = getelementptr inbounds i64, i64* %18, i64 1 | |
%96 = load i64, i64* %95, align 8, !tbaa !78 | |
%97 = trunc i64 %96 to i32 | |
%98 = icmp eq i32 %97, 256 | |
br i1 %98, label %assert_end20, label %assert_fail19, !prof !1 | |
assert_fail19: ; preds = %assert_end18 | |
%99 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %99(i8* getelementptr inbounds ([97 x i8], [97 x i8]* @.str.9, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end20: ; preds = %assert_end18 | |
%100 = getelementptr inbounds i64, i64* %18, i64 2 | |
%101 = load i64, i64* %100, align 8, !tbaa !80 | |
%102 = trunc i64 %101 to i32 | |
%103 = icmp eq i32 %102, 96 | |
br i1 %103, label %assert_end22, label %assert_fail21, !prof !1 | |
assert_fail21: ; preds = %assert_end20 | |
%104 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %104(i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.10, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end22: ; preds = %assert_end20 | |
%105 = getelementptr inbounds i64, i64* %18, i64 3 | |
%106 = load i64, i64* %105, align 8, !tbaa !83 | |
%107 = trunc i64 %106 to i32 | |
%108 = icmp eq i32 %107, 96 | |
br i1 %108, label %assert_end24, label %assert_fail23, !prof !1 | |
assert_fail23: ; preds = %assert_end22 | |
%109 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %109(i8* getelementptr inbounds ([96 x i8], [96 x i8]* @.str.11, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end24: ; preds = %assert_end22 | |
%110 = getelementptr inbounds %0, %0* %6, i64 0, i32 6 | |
%111 = load i64, i64* %110, align 8 | |
%112 = icmp eq i64 %111, 0 | |
br i1 %112, label %assert_end26, label %assert_fail25, !prof !1 | |
assert_fail25: ; preds = %assert_end24 | |
%113 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %113(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.12, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end26: ; preds = %assert_end24 | |
%114 = getelementptr inbounds %0, %0* %11, i64 0, i32 2 | |
%115 = load i32, i32* %114, align 4 | |
%116 = icmp eq i32 %115, 4 | |
br i1 %116, label %assert_end28, label %assert_fail27, !prof !1 | |
assert_fail27: ; preds = %assert_end26 | |
%117 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %117(i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.13, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end28: ; preds = %assert_end26 | |
%118 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 2 | |
%119 = load i16, i16* %118, align 2 | |
%120 = icmp eq i16 %119, 1 | |
%121 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 1 | |
%122 = load i8, i8* %121, align 1 | |
%123 = icmp eq i8 %122, 32 | |
%124 = getelementptr inbounds %0, %0* %11, i64 0, i32 3, i32 0 | |
%125 = load i8, i8* %124, align 1 | |
%126 = icmp eq i8 %125, 2 | |
%127 = and i1 %123, %126 | |
%128 = and i1 %120, %127 | |
br i1 %128, label %assert_end30, label %assert_fail29, !prof !1 | |
assert_fail29: ; preds = %assert_end28 | |
%129 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %129(i8* getelementptr inbounds ([186 x i8], [186 x i8]* @.str.14, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end30: ; preds = %assert_end28 | |
%130 = load i64, i64* %47, align 8, !tbaa !85 | |
%131 = trunc i64 %130 to i32 | |
%132 = icmp eq i32 %131, 256 | |
br i1 %132, label %assert_end32, label %assert_fail31, !prof !1 | |
assert_fail31: ; preds = %assert_end30 | |
%133 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %133(i8* getelementptr inbounds ([97 x i8], [97 x i8]* @.str.15, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end32: ; preds = %assert_end30 | |
%134 = getelementptr inbounds i64, i64* %47, i64 1 | |
%135 = load i64, i64* %134, align 8, !tbaa !99 | |
%136 = trunc i64 %135 to i32 | |
%137 = icmp eq i32 %136, 1 | |
br i1 %137, label %assert_end34, label %assert_fail33, !prof !1 | |
assert_fail33: ; preds = %assert_end32 | |
%138 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %138(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.16, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end34: ; preds = %assert_end32 | |
%139 = getelementptr inbounds i64, i64* %47, i64 2 | |
%140 = load i64, i64* %139, align 8, !tbaa !101 | |
%141 = trunc i64 %140 to i32 | |
%142 = icmp eq i32 %141, 3 | |
br i1 %142, label %assert_end36, label %assert_fail35, !prof !1 | |
assert_fail35: ; preds = %assert_end34 | |
%143 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %143(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.17, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end36: ; preds = %assert_end34 | |
%144 = getelementptr inbounds i64, i64* %47, i64 3 | |
%145 = load i64, i64* %144, align 8, !tbaa !104 | |
%146 = trunc i64 %145 to i32 | |
%147 = icmp eq i32 %146, 3 | |
br i1 %147, label %assert_end38, label %assert_fail37, !prof !1 | |
assert_fail37: ; preds = %assert_end36 | |
%148 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %148(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.18, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end38: ; preds = %assert_end36 | |
%149 = getelementptr inbounds %0, %0* %11, i64 0, i32 6 | |
%150 = load i64, i64* %149, align 8 | |
%151 = icmp eq i64 %150, 0 | |
br i1 %151, label %assert_end40, label %assert_fail39, !prof !1 | |
assert_fail39: ; preds = %assert_end38 | |
%152 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %152(i8* getelementptr inbounds ([112 x i8], [112 x i8]* @.str.19, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end40: ; preds = %assert_end38 | |
%153 = getelementptr inbounds %0, %0* %11, i64 0, i32 1, i32 0 | |
%154 = load i32, i32* %153, align 4 | |
%155 = icmp eq i32 %154, 1 | |
br i1 %155, label %assert_end42, label %assert_fail41, !prof !1 | |
assert_fail41: ; preds = %assert_end40 | |
%156 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %156(i8* getelementptr inbounds ([105 x i8], [105 x i8]* @.str.20, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end42: ; preds = %assert_end40 | |
%157 = getelementptr inbounds %0, %0* %11, i64 0, i32 1, i32 1 | |
%158 = load i32, i32* %157, align 4 | |
%159 = icmp eq i32 %43, %158 | |
br i1 %159, label %assert_end44, label %assert_fail43, !prof !1 | |
assert_fail43: ; preds = %assert_end42 | |
%160 = load void (i8*)*, void (i8*)** @__TVMAPISetLastError, align 8, !tbaa !2 | |
tail call void %160(i8* getelementptr inbounds ([107 x i8], [107 x i8]* @.str.21, i64 0, i64 0)) | |
ret i32 -1 | |
assert_end44: ; preds = %assert_end42 | |
tail call fastcc void @default_function_compute_(i8* %16, i8* %45, i32 %43) | |
ret i32 0 | |
} | |
; Function Attrs: noinline | |
define private fastcc void @default_function_compute_(i8* noalias nocapture readonly, i8* noalias, i32) unnamed_addr #0 { | |
entry: | |
%3 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !2 | |
%4 = tail call i8* %3(i32 1, i32 %2, i64 26214400, i32 2, i32 32) | |
%5 = load i8* (i32, i32, i64, i32, i32)*, i8* (i32, i32, i64, i32, i32)** @__TVMBackendAllocWorkspace, align 8, !tbaa !2 | |
%6 = tail call i8* %5(i32 1, i32 %2, i64 9437184, i32 2, i32 32) | |
%7 = bitcast i8* %0 to float* | |
%8 = bitcast i8* %4 to float* | |
br label %for_begin3.preheader | |
for_begin3.preheader: ; preds = %for_end5, %entry | |
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for_end5 ] | |
%9 = mul nuw nsw i64 %indvar, 102400 | |
%10 = mul nuw nsw i64 %indvar, 160 | |
%11 = mul nuw nsw i64 %indvar, 96 | |
br label %for_begin6.preheader | |
for_begin11.preheader: ; preds = %for_end5 | |
%12 = bitcast i8* %6 to float* | |
%13 = bitcast i8* %1 to float* | |
br label %for_begin14.preheader | |
for_begin6.preheader: ; preds = %for_end8, %for_begin3.preheader | |
%indvar9 = phi i64 [ 0, %for_begin3.preheader ], [ %indvar.next10, %for_end8 ] | |
%14 = add nuw nsw i64 %indvar9, %10 | |
%15 = mul nuw nsw i64 %14, 160 | |
%16 = trunc i64 %indvar9 to i32 | |
%17 = add i32 %16, -32 | |
%18 = icmp ult i32 %17, 96 | |
%19 = add nuw nsw i64 %indvar9, %11 | |
%20 = mul nuw nsw i64 %19, 96 | |
%21 = add nsw i64 %20, -3104 | |
br i1 %18, label %for_body7.us, label %for_body7.preheader | |
for_body7.preheader: ; preds = %for_begin6.preheader | |
%22 = mul nuw nsw i64 %indvar9, 640 | |
%23 = add nuw nsw i64 %9, %22 | |
%scevgep = getelementptr i8, i8* %4, i64 %23 | |
call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep, i8 0, i64 640, i1 false) | |
br label %for_end8 | |
for_body7.us: ; preds = %for_begin6.preheader, %if_end10.us | |
%indvars.iv11 = phi i64 [ %indvars.iv.next12, %if_end10.us ], [ 0, %for_begin6.preheader ] | |
%24 = add nuw nsw i64 %indvars.iv11, %15 | |
%25 = trunc i64 %indvars.iv11 to i32 | |
%26 = add i32 %25, -32 | |
%27 = icmp ult i32 %26, 96 | |
br i1 %27, label %if_then9.us, label %if_end10.us | |
if_then9.us: ; preds = %for_body7.us | |
%28 = add nsw i64 %21, %indvars.iv11 | |
%29 = getelementptr inbounds float, float* %7, i64 %28 | |
%30 = load float, float* %29, align 4, !tbaa !106 | |
br label %if_end10.us | |
if_end10.us: ; preds = %if_then9.us, %for_body7.us | |
%31 = phi float [ %30, %if_then9.us ], [ 0.000000e+00, %for_body7.us ] | |
%32 = getelementptr inbounds float, float* %8, i64 %24 | |
store float %31, float* %32, align 4, !tbaa !109 | |
%indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 | |
%exitcond14 = icmp eq i64 %indvars.iv.next12, 160 | |
br i1 %exitcond14, label %for_end8, label %for_body7.us, !prof !21 | |
for_end5: ; preds = %for_end8 | |
%indvar.next = add nuw nsw i64 %indvar, 1 | |
%exitcond17 = icmp eq i64 %indvar.next, 256 | |
br i1 %exitcond17, label %for_begin11.preheader, label %for_begin3.preheader, !prof !21 | |
for_end8: ; preds = %if_end10.us, %for_body7.preheader | |
%indvar.next10 = add nuw nsw i64 %indvar9, 1 | |
%exitcond16 = icmp eq i64 %indvar.next10, 160 | |
br i1 %exitcond16, label %for_end5, label %for_begin6.preheader, !prof !21 | |
for_begin14.preheader: ; preds = %for_end16, %for_begin11.preheader | |
%indvars.iv3 = phi i64 [ 0, %for_begin11.preheader ], [ %indvars.iv.next4, %for_end16 ] | |
%33 = trunc i64 %indvars.iv3 to i32 | |
%34 = udiv i32 %33, 96 | |
%35 = mul i32 %34, 96 | |
%.decomposed = sub i32 %33, %35 | |
br label %for_body15 | |
for_end13: ; preds = %for_end16 | |
%36 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !2 | |
%37 = tail call i32 %36(i32 1, i32 %2, i8* %6) | |
%38 = load i32 (i32, i32, i8*)*, i32 (i32, i32, i8*)** @__TVMBackendFreeWorkspace, align 8, !tbaa !2 | |
%39 = tail call i32 %38(i32 1, i32 %2, i8* %4) | |
ret void | |
for_body15: ; preds = %for_body15, %for_begin14.preheader | |
%indvars.iv = phi i64 [ 0, %for_begin14.preheader ], [ %indvars.iv.next, %for_body15 ] | |
%40 = phi i32 [ 0, %for_begin14.preheader ], [ %207, %for_body15 ] | |
%41 = mul nuw nsw i64 %indvars.iv, 73728 | |
%42 = add nuw nsw i64 %41, %indvars.iv3 | |
%43 = getelementptr inbounds float, float* %12, i64 %42 | |
%44 = bitcast float* %43 to <8 x float>* | |
%45 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> zeroinitializer, <8 x float>* %44, i64 36864) | |
%46 = mul nuw nsw i64 %indvars.iv, 24 | |
%47 = mul i32 %40, 1280 | |
%48 = add nuw nsw i32 %47, %34 | |
%49 = mul nuw i64 %indvars.iv, 72 | |
%50 = mul i32 %48, 160 | |
%51 = add nsw i32 %50, %.decomposed | |
%52 = sext i32 %51 to i64 | |
%53 = getelementptr inbounds float, float* %8, i64 %52 | |
%54 = bitcast float* %53 to <8 x float>* | |
%55 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%56 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %54, i64 102400) | |
%57 = getelementptr inbounds float, float* %13, i64 %49 | |
%58 = bitcast float* %57 to <8 x float>* | |
%59 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%60 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %58, i64 36) | |
%61 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%62 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%63 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %56, <8 x float> %60, <8 x float> %62) | |
%64 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %63, <8 x float>* %44, i64 36864) | |
%65 = mul i32 %48, 160 | |
%66 = add i32 %65, 32 | |
%67 = add nsw i32 %66, %.decomposed | |
%68 = sext i32 %67 to i64 | |
%69 = getelementptr inbounds float, float* %8, i64 %68 | |
%70 = bitcast float* %69 to <8 x float>* | |
%71 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%72 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %70, i64 102400) | |
%73 = or i64 %49, 1 | |
%74 = getelementptr inbounds float, float* %13, i64 %73 | |
%75 = bitcast float* %74 to <8 x float>* | |
%76 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%77 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %75, i64 36) | |
%78 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%79 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%80 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %72, <8 x float> %77, <8 x float> %79) | |
%81 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %80, <8 x float>* %44, i64 36864) | |
%82 = mul i32 %48, 160 | |
%83 = add i32 %82, 64 | |
%84 = add nsw i32 %83, %.decomposed | |
%85 = sext i32 %84 to i64 | |
%86 = getelementptr inbounds float, float* %8, i64 %85 | |
%87 = bitcast float* %86 to <8 x float>* | |
%88 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%89 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %87, i64 102400) | |
%90 = or i64 %49, 2 | |
%91 = getelementptr inbounds float, float* %13, i64 %90 | |
%92 = bitcast float* %91 to <8 x float>* | |
%93 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%94 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %92, i64 36) | |
%95 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%96 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%97 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %89, <8 x float> %94, <8 x float> %96) | |
%98 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %97, <8 x float>* %44, i64 36864) | |
%99 = mul i32 %40, 1280 | |
%100 = or i32 %99, 32 | |
%101 = add nuw nsw i32 %100, %34 | |
%102 = or i64 %46, 1 | |
%103 = mul nuw nsw i64 %102, 3 | |
%104 = mul i32 %101, 160 | |
%105 = add nsw i32 %104, %.decomposed | |
%106 = sext i32 %105 to i64 | |
%107 = getelementptr inbounds float, float* %8, i64 %106 | |
%108 = bitcast float* %107 to <8 x float>* | |
%109 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%110 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %108, i64 102400) | |
%111 = getelementptr inbounds float, float* %13, i64 %103 | |
%112 = bitcast float* %111 to <8 x float>* | |
%113 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%114 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %112, i64 36) | |
%115 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%116 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%117 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %110, <8 x float> %114, <8 x float> %116) | |
%118 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %117, <8 x float>* %44, i64 36864) | |
%119 = mul i32 %101, 160 | |
%120 = add i32 %119, 32 | |
%121 = add nsw i32 %120, %.decomposed | |
%122 = sext i32 %121 to i64 | |
%123 = getelementptr inbounds float, float* %8, i64 %122 | |
%124 = bitcast float* %123 to <8 x float>* | |
%125 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%126 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %124, i64 102400) | |
%127 = add nuw nsw i64 %103, 1 | |
%128 = getelementptr inbounds float, float* %13, i64 %127 | |
%129 = bitcast float* %128 to <8 x float>* | |
%130 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%131 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %129, i64 36) | |
%132 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%133 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%134 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %126, <8 x float> %131, <8 x float> %133) | |
%135 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %134, <8 x float>* %44, i64 36864) | |
%136 = mul i32 %101, 160 | |
%137 = add i32 %136, 64 | |
%138 = add nsw i32 %137, %.decomposed | |
%139 = sext i32 %138 to i64 | |
%140 = getelementptr inbounds float, float* %8, i64 %139 | |
%141 = bitcast float* %140 to <8 x float>* | |
%142 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%143 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %141, i64 102400) | |
%144 = add nuw nsw i64 %103, 2 | |
%145 = getelementptr inbounds float, float* %13, i64 %144 | |
%146 = bitcast float* %145 to <8 x float>* | |
%147 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%148 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %146, i64 36) | |
%149 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%150 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%151 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %143, <8 x float> %148, <8 x float> %150) | |
%152 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %151, <8 x float>* %44, i64 36864) | |
%153 = mul i32 %40, 1280 | |
%154 = or i32 %153, 64 | |
%155 = add nuw nsw i32 %154, %34 | |
%156 = or i64 %46, 2 | |
%157 = mul nuw nsw i64 %156, 3 | |
%158 = mul i32 %155, 160 | |
%159 = add nsw i32 %158, %.decomposed | |
%160 = sext i32 %159 to i64 | |
%161 = getelementptr inbounds float, float* %8, i64 %160 | |
%162 = bitcast float* %161 to <8 x float>* | |
%163 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%164 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %162, i64 102400) | |
%165 = getelementptr inbounds float, float* %13, i64 %157 | |
%166 = bitcast float* %165 to <8 x float>* | |
%167 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%168 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %166, i64 36) | |
%169 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%170 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%171 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %164, <8 x float> %168, <8 x float> %170) | |
%172 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %171, <8 x float>* %44, i64 36864) | |
%173 = mul i32 %155, 160 | |
%174 = add i32 %173, 32 | |
%175 = add nsw i32 %174, %.decomposed | |
%176 = sext i32 %175 to i64 | |
%177 = getelementptr inbounds float, float* %8, i64 %176 | |
%178 = bitcast float* %177 to <8 x float>* | |
%179 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%180 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %178, i64 102400) | |
%181 = or i64 %157, 1 | |
%182 = getelementptr inbounds float, float* %13, i64 %181 | |
%183 = bitcast float* %182 to <8 x float>* | |
%184 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%185 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* nonnull %183, i64 36) | |
%186 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%187 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%188 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %180, <8 x float> %185, <8 x float> %187) | |
%189 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %188, <8 x float>* %44, i64 36864) | |
%190 = mul i32 %155, 160 | |
%191 = add i32 %190, 64 | |
%192 = add nsw i32 %191, %.decomposed | |
%193 = sext i32 %192 to i64 | |
%194 = getelementptr inbounds float, float* %8, i64 %193 | |
%195 = bitcast float* %194 to <8 x float>* | |
%196 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%197 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %195, i64 102400) | |
%198 = add nuw nsw i64 %157, 2 | |
%199 = getelementptr inbounds float, float* %13, i64 %198 | |
%200 = bitcast float* %199 to <8 x float>* | |
%201 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%202 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %200, i64 36) | |
%203 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
%204 = tail call <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>* %44, i64 36864) | |
%205 = tail call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %197, <8 x float> %202, <8 x float> %204) | |
%206 = tail call i64 @llvm.riscv.vsetvl(i64 8, i64 8) | |
tail call void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float> %205, <8 x float>* %44, i64 36864) | |
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |
%207 = add nuw nsw i32 %40, 1 | |
%exitcond = icmp eq i64 %indvars.iv.next, 32 | |
br i1 %exitcond, label %for_end16, label %for_body15, !prof !21 | |
for_end16: ; preds = %for_body15 | |
%indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 | |
%exitcond5 = icmp eq i64 %indvars.iv.next4, 9216 | |
br i1 %exitcond5, label %for_end13, label %for_begin14.preheader, !prof !21 | |
} | |
; Function Attrs: nounwind | |
declare i64 @llvm.riscv.vsetvl(i64, i64) #1 | |
; Function Attrs: nounwind | |
declare void @llvm.riscv.vsse.v.any.v8f32.p0v8f32.i64(<8 x float>, <8 x float>*, i64) #1 | |
; Function Attrs: nounwind | |
declare <8 x float> @llvm.riscv.vlse.v.any.v8f32.p0v8f32.i64(<8 x float>*, i64) #1 | |
; Function Attrs: nounwind readnone speculatable | |
declare <8 x float> @llvm.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>) #2 | |
; Function Attrs: argmemonly nounwind | |
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #3 | |
attributes #0 = { noinline } | |
attributes #1 = { nounwind } | |
attributes #2 = { nounwind readnone speculatable } | |
attributes #3 = { argmemonly nounwind } | |
!llvm.module.flags = !{!0} | |
!0 = !{i32 2, !"tvm_target", !"llvm"} | |
!1 = !{!"branch_weights", i32 1048576, i32 1} | |
!2 = !{!3, !3, i64 0} | |
!3 = !{!"ctx_ptr", !4, i64 0} | |
!4 = !{!"tvm-tbaa"} | |
!5 = !{!6, !6, i64 0} | |
!6 = !{!"0x11c4750.w1.b0", !7, i64 0} | |
!7 = !{!"0x11c4750.w2.b0", !8, i64 0} | |
!8 = !{!"0x11c4750.w4.b0", !9, i64 0} | |
!9 = !{!"0x11c4750.w8.b0", !10, i64 0} | |
!10 = !{!"0x11c4750.w16.b0", !11, i64 0} | |
!11 = !{!"0x11c4750.w32.b0", !12, i64 0} | |
!12 = !{!"0x11c4750.w64.b0", !13, i64 0} | |
!13 = !{!"0x11c4750.w128.b0", !14, i64 0} | |
!14 = !{!"0x11c4750.w256.b0", !15, i64 0} | |
!15 = !{!"0x11c4750.w512.b0", !16, i64 0} | |
!16 = !{!"0x11c4750.w1024.b0", !17, i64 0} | |
!17 = !{!"int32", !18, i64 0} | |
!18 = !{!"0x11c4750", !4, i64 0} | |
!19 = !{!20, !20, i64 0} | |
!20 = !{!"0x11c4750.w1.b1", !7, i64 0} | |
!21 = !{!"branch_weights", i32 1, i32 1048576} | |
!22 = !{!23, !23, i64 0} | |
!23 = !{!"0x11cf4d0.w1.b0", !24, i64 0} | |
!24 = !{!"0x11cf4d0.w2.b0", !25, i64 0} | |
!25 = !{!"0x11cf4d0.w4.b0", !26, i64 0} | |
!26 = !{!"0x11cf4d0.w8.b0", !27, i64 0} | |
!27 = !{!"0x11cf4d0.w16.b0", !28, i64 0} | |
!28 = !{!"0x11cf4d0.w32.b0", !29, i64 0} | |
!29 = !{!"0x11cf4d0.w64.b0", !30, i64 0} | |
!30 = !{!"0x11cf4d0.w128.b0", !31, i64 0} | |
!31 = !{!"0x11cf4d0.w256.b0", !32, i64 0} | |
!32 = !{!"0x11cf4d0.w512.b0", !33, i64 0} | |
!33 = !{!"0x11cf4d0.w1024.b0", !34, i64 0} | |
!34 = !{!"int64", !35, i64 0} | |
!35 = !{!"0x11cf4d0", !4, i64 0} | |
!36 = !{!37, !37, i64 0} | |
!37 = !{!"0x11cf4d0.w1.b1", !24, i64 0} | |
!38 = !{!39, !39, i64 0} | |
!39 = !{!"0x11cf4d0.w1.b2", !40, i64 0} | |
!40 = !{!"0x11cf4d0.w2.b2", !25, i64 0} | |
!41 = !{!42, !42, i64 0} | |
!42 = !{!"0x11cf4d0.w1.b3", !40, i64 0} | |
!43 = !{!44, !44, i64 0} | |
!44 = !{!"0x11d24d0.w1.b0", !45, i64 0} | |
!45 = !{!"0x11d24d0.w2.b0", !46, i64 0} | |
!46 = !{!"0x11d24d0.w4.b0", !47, i64 0} | |
!47 = !{!"0x11d24d0.w8.b0", !48, i64 0} | |
!48 = !{!"0x11d24d0.w16.b0", !49, i64 0} | |
!49 = !{!"0x11d24d0.w32.b0", !50, i64 0} | |
!50 = !{!"0x11d24d0.w64.b0", !51, i64 0} | |
!51 = !{!"0x11d24d0.w128.b0", !52, i64 0} | |
!52 = !{!"0x11d24d0.w256.b0", !53, i64 0} | |
!53 = !{!"0x11d24d0.w512.b0", !54, i64 0} | |
!54 = !{!"0x11d24d0.w1024.b0", !55, i64 0} | |
!55 = !{!"int64", !56, i64 0} | |
!56 = !{!"0x11d24d0", !4, i64 0} | |
!57 = !{!58, !58, i64 0} | |
!58 = !{!"0x11d24d0.w1.b1", !45, i64 0} | |
!59 = !{!60, !60, i64 0} | |
!60 = !{!"0x11d24d0.w1.b2", !61, i64 0} | |
!61 = !{!"0x11d24d0.w2.b2", !46, i64 0} | |
!62 = !{!63, !63, i64 0} | |
!63 = !{!"0x11d24d0.w1.b3", !61, i64 0} | |
!64 = !{!65, !65, i64 0} | |
!65 = !{!"0x11c42b0.w1.b0", !66, i64 0} | |
!66 = !{!"0x11c42b0.w2.b0", !67, i64 0} | |
!67 = !{!"0x11c42b0.w4.b0", !68, i64 0} | |
!68 = !{!"0x11c42b0.w8.b0", !69, i64 0} | |
!69 = !{!"0x11c42b0.w16.b0", !70, i64 0} | |
!70 = !{!"0x11c42b0.w32.b0", !71, i64 0} | |
!71 = !{!"0x11c42b0.w64.b0", !72, i64 0} | |
!72 = !{!"0x11c42b0.w128.b0", !73, i64 0} | |
!73 = !{!"0x11c42b0.w256.b0", !74, i64 0} | |
!74 = !{!"0x11c42b0.w512.b0", !75, i64 0} | |
!75 = !{!"0x11c42b0.w1024.b0", !76, i64 0} | |
!76 = !{!"int64", !77, i64 0} | |
!77 = !{!"0x11c42b0", !4, i64 0} | |
!78 = !{!79, !79, i64 0} | |
!79 = !{!"0x11c42b0.w1.b1", !66, i64 0} | |
!80 = !{!81, !81, i64 0} | |
!81 = !{!"0x11c42b0.w1.b2", !82, i64 0} | |
!82 = !{!"0x11c42b0.w2.b2", !67, i64 0} | |
!83 = !{!84, !84, i64 0} | |
!84 = !{!"0x11c42b0.w1.b3", !82, i64 0} | |
!85 = !{!86, !86, i64 0} | |
!86 = !{!"0x11d0100.w1.b0", !87, i64 0} | |
!87 = !{!"0x11d0100.w2.b0", !88, i64 0} | |
!88 = !{!"0x11d0100.w4.b0", !89, i64 0} | |
!89 = !{!"0x11d0100.w8.b0", !90, i64 0} | |
!90 = !{!"0x11d0100.w16.b0", !91, i64 0} | |
!91 = !{!"0x11d0100.w32.b0", !92, i64 0} | |
!92 = !{!"0x11d0100.w64.b0", !93, i64 0} | |
!93 = !{!"0x11d0100.w128.b0", !94, i64 0} | |
!94 = !{!"0x11d0100.w256.b0", !95, i64 0} | |
!95 = !{!"0x11d0100.w512.b0", !96, i64 0} | |
!96 = !{!"0x11d0100.w1024.b0", !97, i64 0} | |
!97 = !{!"int64", !98, i64 0} | |
!98 = !{!"0x11d0100", !4, i64 0} | |
!99 = !{!100, !100, i64 0} | |
!100 = !{!"0x11d0100.w1.b1", !87, i64 0} | |
!101 = !{!102, !102, i64 0} | |
!102 = !{!"0x11d0100.w1.b2", !103, i64 0} | |
!103 = !{!"0x11d0100.w2.b2", !88, i64 0} | |
!104 = !{!105, !105, i64 0} | |
!105 = !{!"0x11d0100.w1.b3", !103, i64 0} | |
!106 = !{!107, !107, i64 0} | |
!107 = !{!"float32", !108, i64 0} | |
!108 = !{!"0x11c4300", !4, i64 0} | |
!109 = !{!110, !110, i64 0} | |
!110 = !{!"float32", !111, i64 0} | |
!111 = !{!"0x11c3540", !4, i64 0} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tvm | |
import topi | |
batch = 1 | |
in_channel = 256 | |
in_height = 96 | |
in_width = 96 | |
filter_channel = in_channel | |
channel_multiplier = 1 | |
filter_height = 3 | |
filter_width = 3 | |
stride_h = 1 | |
stride_w = 1 | |
padding = 'SAME' # or 'VALID' | |
# Placeholder | |
Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') | |
Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') | |
Stride = [stride_h, stride_w] | |
Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') | |
Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') | |
# Declare | |
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, Stride, padding, 32) | |
s = tvm.create_schedule(DepthwiseConv2d.op) | |
# Origin Compute | |
print(tvm.lower(s, [Input, Filter], simple_mode=True)) | |
n, cc, h, w = DepthwiseConv2d.op.axis | |
s[DepthwiseConv2d].reorder(n, h, w, cc) | |
c = s[DepthwiseConv2d].fuse(h, w) | |
fused = s[DepthwiseConv2d].fuse(n, c) | |
if DepthwiseConv2d.shape[1].value % 8 == 0: | |
cco, cci = s[DepthwiseConv2d].split(cc, factor=8) | |
elif DepthwiseConv2d.shape[1].value % 4 == 0: | |
cco, cci = s[DepthwiseConv2d].split(cc, factor=4) | |
else: | |
cco, cci = s[DepthwiseConv2d].split(cc, factor=1) | |
s[DepthwiseConv2d].reorder(fused, cco, cci) | |
s[DepthwiseConv2d].vectorize(cci) | |
# After schedule | |
print(tvm.lower(s, [Input, Filter], simple_mode=True)) | |
# Get the llvm IR | |
m = tvm.build(s, [Input, Filter], target='llvm') | |
file_path = 'stride_load_store_sample.ll' | |
with open(file_path, "w") as f: | |
f.write(m.get_source()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment