Last active
May 21, 2020 06:18
-
-
Save BeMg/e5083dd453054b4a0704e4a04f4a572f to your computer and use it in GitHub Desktop.
temp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Function Attrs: nounwind readnone uwtable | |
define <4 x float> @_Z7_cl_sinDv4_f(<4 x float> %x) local_unnamed_addr #2 { | |
entry: | |
%r0 = alloca <4 x float>, align 16 | |
%r1 = alloca <4 x float>, align 16 | |
%astype = bitcast <4 x float> %x to <4 x i32> | |
%and = and <4 x i32> %astype, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> | |
%astype1 = bitcast <4 x i32> %and to <4 x float> | |
%0 = bitcast <4 x float>* %r0 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #10 | |
%1 = bitcast <4 x float>* %r1 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %1) #10 | |
%2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1, <4 x float> <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>) #10 | |
%3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %2) #10 | |
%conv.i.i.i.i.i.i.i = sitofp <4 x i32> %3 to <4 x float> | |
%sub.i.i19.i.i.i.i.i = fsub <4 x float> %2, %conv.i.i.i.i.i.i.i | |
%4 = bitcast <4 x float> %2 to <2 x i64> | |
%and.i.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 9223372034707292159, i64 9223372034707292159> | |
%5 = bitcast <2 x i64> %and.i.i.i.i.i.i.i.i.i to <4 x float> | |
%6 = fcmp oeq <4 x float> %5, <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000> | |
%7 = fcmp oge <4 x float> %5, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000> | |
%or.i.i2021.i.i.i.i.i = or <4 x i1> %7, %6 | |
%sub.i.i.i.i.i.i.i = fsub <4 x float> %2, %sub.i.i19.i.i.i.i.i | |
%8 = bitcast <4 x float> %sub.i.i.i.i.i.i.i to <2 x i64> | |
%and.i.i10.i.i.i.i.i.i = and <2 x i64> %8, <i64 9223372034707292159, i64 9223372034707292159> | |
%and.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 -9223372034707292160, i64 -9223372034707292160> | |
%xor.i.i11.i.i.i.i.i.i = or <2 x i64> %and.i.i10.i.i.i.i.i.i, %and.i.i.i.i.i.i.i.i | |
%9 = bitcast <2 x i64> %xor.i.i11.i.i.i.i.i.i to <4 x float> | |
%10 = select <4 x i1> %or.i.i2021.i.i.i.i.i, <4 x float> %2, <4 x float> %9 | |
%astype.i.i.i = bitcast <4 x float> %10 to <4 x i32> | |
%and.i.i.i = and <4 x i32> %astype.i.i.i, <i32 -4096, i32 -4096, i32 -4096, i32 -4096> | |
%astype1.i.i.i = bitcast <4 x i32> %and.i.i.i to <4 x float> | |
%sub.i.i.i = fsub <4 x float> %10, %astype1.i.i.i | |
%mul.i.i.i = fmul <4 x float> %10, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000> | |
%fneg.i.i.i = fneg <4 x float> %mul.i.i.i | |
%11 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %fneg.i.i.i) #10 | |
%12 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %11) #10 | |
%13 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %12) #10 | |
%14 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %13) #10 | |
%sub.i.i = fsub <4 x float> %astype1, %mul.i.i.i | |
%sub2.i.i = fsub <4 x float> %astype1, %sub.i.i | |
%sub3.i.i = fsub <4 x float> %sub2.i.i, %mul.i.i.i | |
%sub4.i.i = fsub <4 x float> %sub3.i.i, %14 | |
%add.i.i = fadd <4 x float> %sub.i.i, %sub4.i.i | |
%mul.i44.i.i = fmul <4 x float> %10, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000> | |
%fneg.i45.i.i = fneg <4 x float> %mul.i44.i.i | |
%15 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %fneg.i45.i.i) #10 | |
%16 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %15) #10 | |
%17 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %16) #10 | |
%18 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %17) #10 | |
%sub5.i.i = fsub <4 x float> %add.i.i, %mul.i44.i.i | |
%sub6.i.i = fsub <4 x float> %add.i.i, %sub5.i.i | |
%sub7.i.i = fsub <4 x float> %sub6.i.i, %mul.i44.i.i | |
%sub8.i.i = fsub <4 x float> %sub7.i.i, %18 | |
%add9.i.i = fadd <4 x float> %sub5.i.i, %sub8.i.i | |
%mul.i54.i.i = fmul <4 x float> %10, <float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000> | |
%fneg.i55.i.i = fneg <4 x float> %mul.i54.i.i | |
%19 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %fneg.i55.i.i) #10 | |
%20 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %19) #10 | |
%21 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %20) #10 | |
%22 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %21) #10 | |
%sub10.i.i = fsub <4 x float> %add9.i.i, %mul.i54.i.i | |
%sub11.i.i = fsub <4 x float> %add9.i.i, %sub10.i.i | |
%sub12.i.i = fsub <4 x float> %sub11.i.i, %mul.i54.i.i | |
%add13.i.i = fadd <4 x float> %sub10.i.i, %sub12.i.i | |
store <4 x float> %add13.i.i, <4 x float>* %r0, align 16, !tbaa !6 | |
%fneg.i.i = fneg <4 x float> %22 | |
store <4 x float> %fneg.i.i, <4 x float>* %r1, align 16, !tbaa !6 | |
%cmp.i = fcmp oge <4 x float> %astype1, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000> | |
%sext.i = sext <4 x i1> %cmp.i to <4 x i32> | |
%.bc.i.i = bitcast <4 x i32> %sext.i to <2 x double> | |
%.extract.i.i = extractelement <2 x double> %.bc.i.i, i32 0 | |
%23 = bitcast double %.extract.i.i to <2 x i32> | |
%24 = extractelement <2 x i32> %23, i64 0 | |
%25 = extractelement <2 x i32> %23, i64 1 | |
%26 = or i32 %24, %25 | |
%27 = icmp sgt i32 %26, -1 | |
br i1 %27, label %_Z7_cl_anyDv4_i.exit.i, label %if.then.i | |
_Z7_cl_anyDv4_i.exit.i: ; preds = %entry | |
%28 = fptosi <4 x float> %10 to <4 x i32> | |
%and.i.i = and <4 x i32> %28, <i32 3, i32 3, i32 3, i32 3> | |
%.extract6.i.i = extractelement <2 x double> %.bc.i.i, i32 1 | |
%29 = bitcast double %.extract6.i.i to <2 x i32> | |
%30 = extractelement <2 x i32> %29, i64 0 | |
%31 = extractelement <2 x i32> %29, i64 1 | |
%32 = or i32 %31, %30 | |
%tobool.i = icmp sgt i32 %32, -1 | |
br i1 %tobool.i, label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit, label %if.then.i | |
if.then.i: ; preds = %_Z7_cl_anyDv4_i.exit.i, %entry | |
%call3.i = call <4 x i32> @_Z25__pocl_argReductionLargeSPU9CLprivateDv4_fS1_S_(<4 x float>* nonnull %r0, <4 x float>* nonnull %r1, <4 x float> %astype1) #10 | |
%.pre = load <4 x float>, <4 x float>* %r0, align 16, !tbaa !6 | |
%.pre6 = load <4 x float>, <4 x float>* %r1, align 16, !tbaa !6 | |
br label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit | |
_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit: ; preds = %if.then.i, %_Z7_cl_anyDv4_i.exit.i | |
%33 = phi <4 x float> [ %.pre6, %if.then.i ], [ %fneg.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%34 = phi <4 x float> [ %.pre, %if.then.i ], [ %add13.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%retval1.0.i = phi <4 x i32> [ %call3.i, %if.then.i ], [ %and.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%mul.i = fmul <4 x float> %34, %34 | |
%mul1.i = fmul <4 x float> %34, %mul.i | |
%35 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000>, <4 x float> <float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000>) #10 | |
%36 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %35, <4 x float> <float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000>) #10 | |
%37 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %36, <4 x float> <float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000>) #10 | |
%38 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %37, <4 x float> <float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000>) #10 | |
%fneg.i = fneg <4 x float> %mul1.i | |
%mul5.i = fmul <4 x float> %38, %fneg.i | |
%39 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %33, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %mul5.i) #10 | |
%fneg7.i = fneg <4 x float> %33 | |
%40 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %39, <4 x float> %fneg7.i) #10 | |
%41 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul1.i, <4 x float> <float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000>, <4 x float> %40) #10 | |
%sub.i = fsub <4 x float> %34, %41 | |
%42 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000>, <4 x float> <float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000>) #10 | |
%43 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %42, <4 x float> <float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000>) #10 | |
%44 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %43, <4 x float> <float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000>) #10 | |
%45 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %44, <4 x float> <float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000>) #10 | |
%46 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %45, <4 x float> <float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000>) #10 | |
%mul5.i2 = fmul <4 x float> %mul.i, %46 | |
%astype.i = bitcast <4 x float> %34 to <4 x i32> | |
%and.i = and <4 x i32> %astype.i, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> | |
%sub.i3 = add nsw <4 x i32> %and.i, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216> | |
%cmp.i4 = icmp ugt <4 x i32> %and.i, <i32 1050253721, i32 1050253721, i32 1050253721, i32 1050253721> | |
%cmp7.i = icmp ult <4 x i32> %and.i, <i32 1061683201, i32 1061683201, i32 1061683201, i32 1061683201> | |
%and952.i = and <4 x i1> %cmp.i4, %cmp7.i | |
%47 = select <4 x i1> %and952.i, <4 x i32> %sub.i3, <4 x i32> zeroinitializer | |
%48 = select <4 x i1> %cmp7.i, <4 x i32> %47, <4 x i32> <i32 1049624576, i32 1049624576, i32 1049624576, i32 1049624576> | |
%49 = bitcast <4 x i32> %48 to <4 x float> | |
%fneg.i5 = fneg <4 x float> %49 | |
%50 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %fneg.i5) #10 | |
%sub16.i = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %49 | |
%fneg17.i = fneg <4 x float> %34 | |
%mul18.i = fmul <4 x float> %33, %fneg17.i | |
%51 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %mul5.i2, <4 x float> %mul18.i) #10 | |
%sub20.i = fsub <4 x float> %50, %51 | |
%sub21.i = fsub <4 x float> %sub16.i, %sub20.i | |
%.mask = and <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1> | |
%52 = icmp eq <4 x i32> %.mask, zeroinitializer | |
%.v = select <4 x i1> %52, <4 x float> %sub.i, <4 x float> %sub21.i | |
%53 = bitcast <4 x float> %.v to <4 x i32> | |
%cmp = icmp sgt <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1> | |
%54 = select <4 x i1> %cmp, <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, <4 x i32> zeroinitializer | |
%xor8 = and <4 x i32> %astype, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> | |
%xor = xor <4 x i32> %54, %xor8 | |
%xor9 = xor <4 x i32> %xor, %53 | |
%cmp11 = icmp ult <4 x i32> %and, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> | |
%55 = select <4 x i1> %cmp11, <4 x i32> %xor9, <4 x i32> <i32 2143289344, i32 2143289344, i32 2143289344, i32 2143289344> | |
%cmp15 = fcmp une <4 x float> %x, zeroinitializer | |
%56 = select <4 x i1> %cmp15, <4 x i32> %55, <4 x i32> %astype | |
%57 = bitcast <4 x i32> %56 to <4 x float> | |
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %1) #10 | |
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #10 | |
ret <4 x float> %57 | |
} | |
; Function Attrs: nounwind readnone uwtable | |
define <4 x float> @_Z7_cl_cosDv4_f(<4 x float> %x) local_unnamed_addr #2 { | |
entry: | |
%r0 = alloca <4 x float>, align 16 | |
%r1 = alloca <4 x float>, align 16 | |
%astype = bitcast <4 x float> %x to <4 x i32> | |
%and = and <4 x i32> %astype, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> | |
%astype1 = bitcast <4 x i32> %and to <4 x float> | |
%0 = bitcast <4 x float>* %r0 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #10 | |
%1 = bitcast <4 x float>* %r1 to i8* | |
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %1) #10 | |
%2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1, <4 x float> <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>) #10 | |
%3 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %2) #10 | |
%conv.i.i.i.i.i.i.i = sitofp <4 x i32> %3 to <4 x float> | |
%sub.i.i19.i.i.i.i.i = fsub <4 x float> %2, %conv.i.i.i.i.i.i.i | |
%4 = bitcast <4 x float> %2 to <2 x i64> | |
%and.i.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 9223372034707292159, i64 9223372034707292159> | |
%5 = bitcast <2 x i64> %and.i.i.i.i.i.i.i.i.i to <4 x float> | |
%6 = fcmp oeq <4 x float> %5, <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000> | |
%7 = fcmp oge <4 x float> %5, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000> | |
%or.i.i2021.i.i.i.i.i = or <4 x i1> %7, %6 | |
%sub.i.i.i.i.i.i.i = fsub <4 x float> %2, %sub.i.i19.i.i.i.i.i | |
%8 = bitcast <4 x float> %sub.i.i.i.i.i.i.i to <2 x i64> | |
%and.i.i10.i.i.i.i.i.i = and <2 x i64> %8, <i64 9223372034707292159, i64 9223372034707292159> | |
%and.i.i.i.i.i.i.i.i = and <2 x i64> %4, <i64 -9223372034707292160, i64 -9223372034707292160> | |
%xor.i.i11.i.i.i.i.i.i = or <2 x i64> %and.i.i10.i.i.i.i.i.i, %and.i.i.i.i.i.i.i.i | |
%9 = bitcast <2 x i64> %xor.i.i11.i.i.i.i.i.i to <4 x float> | |
%10 = select <4 x i1> %or.i.i2021.i.i.i.i.i, <4 x float> %2, <4 x float> %9 | |
%astype.i.i.i = bitcast <4 x float> %10 to <4 x i32> | |
%and.i.i.i = and <4 x i32> %astype.i.i.i, <i32 -4096, i32 -4096, i32 -4096, i32 -4096> | |
%astype1.i.i.i = bitcast <4 x i32> %and.i.i.i to <4 x float> | |
%sub.i.i.i = fsub <4 x float> %10, %astype1.i.i.i | |
%mul.i.i.i = fmul <4 x float> %10, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000> | |
%fneg.i.i.i = fneg <4 x float> %mul.i.i.i | |
%11 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %fneg.i.i.i) #10 | |
%12 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %11) #10 | |
%13 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000, float 0x3FF9200000000000>, <4 x float> %12) #10 | |
%14 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000, float 0x3F3FB40000000000>, <4 x float> %13) #10 | |
%sub.i.i = fsub <4 x float> %astype1, %mul.i.i.i | |
%sub2.i.i = fsub <4 x float> %astype1, %sub.i.i | |
%sub3.i.i = fsub <4 x float> %sub2.i.i, %mul.i.i.i | |
%sub4.i.i = fsub <4 x float> %sub3.i.i, %14 | |
%add.i.i = fadd <4 x float> %sub.i.i, %sub4.i.i | |
%mul.i44.i.i = fmul <4 x float> %10, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000> | |
%fneg.i45.i.i = fneg <4 x float> %mul.i44.i.i | |
%15 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %fneg.i45.i.i) #10 | |
%16 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %15) #10 | |
%17 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000, float 0x3E74440000000000>, <4 x float> %16) #10 | |
%18 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000, float 0x3D86800000000000>, <4 x float> %17) #10 | |
%sub5.i.i = fsub <4 x float> %add.i.i, %mul.i44.i.i | |
%sub6.i.i = fsub <4 x float> %add.i.i, %sub5.i.i | |
%sub7.i.i = fsub <4 x float> %sub6.i.i, %mul.i44.i.i | |
%sub8.i.i = fsub <4 x float> %sub7.i.i, %18 | |
%add9.i.i = fadd <4 x float> %sub5.i.i, %sub8.i.i | |
%mul.i54.i.i = fmul <4 x float> %10, <float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000, float 0x3CF8469880000000> | |
%fneg.i55.i.i = fneg <4 x float> %mul.i54.i.i | |
%19 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %fneg.i55.i.i) #10 | |
%20 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %astype1.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %19) #10 | |
%21 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000, float 0x3CF8460000000000>, <4 x float> %20) #10 | |
%22 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i.i.i, <4 x float> <float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000, float 0x3C23100000000000>, <4 x float> %21) #10 | |
%sub10.i.i = fsub <4 x float> %add9.i.i, %mul.i54.i.i | |
%sub11.i.i = fsub <4 x float> %add9.i.i, %sub10.i.i | |
%sub12.i.i = fsub <4 x float> %sub11.i.i, %mul.i54.i.i | |
%add13.i.i = fadd <4 x float> %sub10.i.i, %sub12.i.i | |
store <4 x float> %add13.i.i, <4 x float>* %r0, align 16, !tbaa !6 | |
%fneg.i.i = fneg <4 x float> %22 | |
store <4 x float> %fneg.i.i, <4 x float>* %r1, align 16, !tbaa !6 | |
%cmp.i = fcmp oge <4 x float> %astype1, <float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000, float 0x4160000000000000> | |
%sext.i = sext <4 x i1> %cmp.i to <4 x i32> | |
%.bc.i.i = bitcast <4 x i32> %sext.i to <2 x double> | |
%.extract.i.i = extractelement <2 x double> %.bc.i.i, i32 0 | |
%23 = bitcast double %.extract.i.i to <2 x i32> | |
%24 = extractelement <2 x i32> %23, i64 0 | |
%25 = extractelement <2 x i32> %23, i64 1 | |
%26 = or i32 %24, %25 | |
%27 = icmp sgt i32 %26, -1 | |
br i1 %27, label %_Z7_cl_anyDv4_i.exit.i, label %if.then.i | |
_Z7_cl_anyDv4_i.exit.i: ; preds = %entry | |
%28 = fptosi <4 x float> %10 to <4 x i32> | |
%and.i.i = and <4 x i32> %28, <i32 3, i32 3, i32 3, i32 3> | |
%.extract6.i.i = extractelement <2 x double> %.bc.i.i, i32 1 | |
%29 = bitcast double %.extract6.i.i to <2 x i32> | |
%30 = extractelement <2 x i32> %29, i64 0 | |
%31 = extractelement <2 x i32> %29, i64 1 | |
%32 = or i32 %31, %30 | |
%tobool.i = icmp sgt i32 %32, -1 | |
br i1 %tobool.i, label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit, label %if.then.i | |
if.then.i: ; preds = %_Z7_cl_anyDv4_i.exit.i, %entry | |
%call3.i = call <4 x i32> @_Z25__pocl_argReductionLargeSPU9CLprivateDv4_fS1_S_(<4 x float>* nonnull %r0, <4 x float>* nonnull %r1, <4 x float> %astype1) #10 | |
%.pre = load <4 x float>, <4 x float>* %r0, align 16, !tbaa !6 | |
%.pre6 = load <4 x float>, <4 x float>* %r1, align 16, !tbaa !6 | |
br label %_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit | |
_Z20__pocl_argReductionSPU9CLprivateDv4_fS1_S_.exit: ; preds = %if.then.i, %_Z7_cl_anyDv4_i.exit.i | |
%33 = phi <4 x float> [ %.pre6, %if.then.i ], [ %fneg.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%34 = phi <4 x float> [ %.pre, %if.then.i ], [ %add13.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%retval1.0.i = phi <4 x i32> [ %call3.i, %if.then.i ], [ %and.i.i, %_Z7_cl_anyDv4_i.exit.i ] | |
%mul.i = fmul <4 x float> %34, %34 | |
%mul1.i = fmul <4 x float> %34, %mul.i | |
%35 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000, float 0x3DE5D93A60000000>, <4 x float> <float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000, float 0xBE5AE5E680000000>) #10 | |
%36 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %35, <4 x float> <float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000, float 0x3EC6DBE4A0000000>) #10 | |
%37 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %36, <4 x float> <float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000, float 0xBF2A013A80000000>) #10 | |
%38 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %37, <4 x float> <float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000, float 0x3F811110E0000000>) #10 | |
%fneg.i = fneg <4 x float> %mul1.i | |
%mul5.i = fmul <4 x float> %38, %fneg.i | |
%39 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %33, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %mul5.i) #10 | |
%fneg7.i = fneg <4 x float> %33 | |
%40 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %39, <4 x float> %fneg7.i) #10 | |
%41 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul1.i, <4 x float> <float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000, float 0x3FC5555560000000>, <4 x float> %40) #10 | |
%sub.i = fsub <4 x float> %34, %41 | |
%fneg = fneg <4 x float> %sub.i | |
%42 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000, float 0xBDA8FAE9C0000000>, <4 x float> <float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000, float 0x3E21EE9EC0000000>) #10 | |
%43 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %42, <4 x float> <float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000, float 0xBE92524740000000>) #10 | |
%44 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %43, <4 x float> <float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000, float 0x3EFA015C40000000>) #10 | |
%45 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %44, <4 x float> <float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000, float 0xBF56C16C00000000>) #10 | |
%46 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %45, <4 x float> <float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000, float 0x3FA5555560000000>) #10 | |
%mul5.i2 = fmul <4 x float> %mul.i, %46 | |
%astype.i = bitcast <4 x float> %34 to <4 x i32> | |
%and.i = and <4 x i32> %astype.i, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> | |
%sub.i3 = add nsw <4 x i32> %and.i, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216> | |
%cmp.i4 = icmp ugt <4 x i32> %and.i, <i32 1050253721, i32 1050253721, i32 1050253721, i32 1050253721> | |
%cmp7.i = icmp ult <4 x i32> %and.i, <i32 1061683201, i32 1061683201, i32 1061683201, i32 1061683201> | |
%and952.i = and <4 x i1> %cmp.i4, %cmp7.i | |
%47 = select <4 x i1> %and952.i, <4 x i32> %sub.i3, <4 x i32> zeroinitializer | |
%48 = select <4 x i1> %cmp7.i, <4 x i32> %47, <4 x i32> <i32 1049624576, i32 1049624576, i32 1049624576, i32 1049624576> | |
%49 = bitcast <4 x i32> %48 to <4 x float> | |
%fneg.i5 = fneg <4 x float> %49 | |
%50 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, <4 x float> %fneg.i5) #10 | |
%sub16.i = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %49 | |
%fneg17.i = fneg <4 x float> %34 | |
%mul18.i = fmul <4 x float> %33, %fneg17.i | |
%51 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %mul.i, <4 x float> %mul5.i2, <4 x float> %mul18.i) #10 | |
%sub20.i = fsub <4 x float> %50, %51 | |
%sub21.i = fsub <4 x float> %sub16.i, %sub20.i | |
%.mask = and <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1> | |
%52 = icmp eq <4 x i32> %.mask, zeroinitializer | |
%.v = select <4 x i1> %52, <4 x float> %sub21.i, <4 x float> %fneg | |
%53 = bitcast <4 x float> %.v to <4 x i32> | |
%shr29 = lshr <4 x i32> %retval1.0.i, <i32 1, i32 1, i32 1, i32 1> | |
%shl4 = shl <4 x i32> %shr29, <i32 31, i32 31, i32 31, i32 31> | |
%xor = xor <4 x i32> %shl4, %53 | |
%cmp = icmp ult <4 x i32> %and, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> | |
%54 = select <4 x i1> %cmp, <4 x i32> %xor, <4 x i32> <i32 2143289344, i32 2143289344, i32 2143289344, i32 2143289344> | |
%cmp10 = fcmp une <4 x float> %x, zeroinitializer | |
%55 = bitcast <4 x i32> %54 to <4 x float> | |
%56 = select <4 x i1> %cmp10, <4 x float> %55, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> | |
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %1) #10 | |
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #10 | |
ret <4 x float> %56 | |
} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__kernel | |
void test_sin_cos() { | |
volatile float4 va = (float4)(3.0f, 5.0f, -2.0f, -9.0f); | |
volatile float4 vb = (float4)(2.0f, -4.4f, -1.0f, -20.0f); | |
float4 vsin = sin(va); | |
float4 vcos = cos(vb); | |
if (any(vsin != (float4)(3.0f, 5.0f, -1.0f, -9.0f)) || | |
any(vcos != (float4)(2.0f, -4.4f, -2.0f, -20.0f))) { | |
printf("Make sure this function has side effect.\n"); | |
} | |
} | |
__kernel | |
void test_min_max() { | |
volatile float4 va = (float4)(3.0f, 5.0f, -2.0f, -9.0f); | |
volatile float4 vb = (float4)(2.0f, -4.4f, -1.0f, -20.0f); | |
float4 vmax = max(va, vb); | |
float4 vmin = min(va, vb); | |
if (any(vmax != (float4)(3.0f, 5.0f, -1.0f, -9.0f)) || | |
any(vmin != (float4)(2.0f, -4.4f, -2.0f, -20.0f))) { | |
printf("min or max on float4 failed.\n"); | |
} | |
} | |
__kernel void vec_add(__global int *out, | |
__global const int *in1, | |
__global const int *in2) { | |
int i = get_global_id(0); | |
test_sin_cos(); | |
test_min_max(); | |
out[i] = in1[i] + in2[i]; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment