Last active
March 28, 2025 06:10
-
-
Save cmdr2/a76df5af311417619788e8330b1908b3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11a12,13 | |
> #include "ggml-cpu/unary-ops.h" | |
> #include "ggml-cpu/binary-ops.h" | |
4292,4625d4293 | |
< static void ggml_compute_forward_add_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< GGML_ASSERT(nb00 == sizeof(float)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(float)) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< #ifdef GGML_USE_ACCELERATE | |
< vDSP_vadd(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10); | |
< #else | |
< ggml_vec_add_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< #endif | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< | |
< for (int64_t i0 = 0; i0 < ne0; ++i0) { | |
< const int64_t i10 = i0 % ne10; | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10); | |
< | |
< dst_ptr[i0] = src0_ptr[i0] + *src1_ptr; | |
< } | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_add_f16_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_F16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F32); | |
< | |
< if (dst->type == GGML_TYPE_F32) { | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< } | |
< else { | |
< GGML_ASSERT(dst->type == GGML_TYPE_F16); | |
< GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); | |
< } | |
< | |
< GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(float)) { | |
< if (dst->type == GGML_TYPE_F16) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src0, src1 and dst are same shape => same indices | |
< const int i3 = ir/(ne2*ne1); | |
< const int i2 = (ir - i3*ne2*ne1)/ne1; | |
< const int i1 = (ir - i3*ne2*ne1 - i2*ne1); | |
< | |
< ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); | |
< | |
< for (int i = 0; i < ne0; i++) { | |
< dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); | |
< } | |
< } | |
< } else { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src0, src1 and dst are same shape => same indices | |
< const int i3 = ir/(ne2*ne1); | |
< const int i2 = (ir - i3*ne2*ne1)/ne1; | |
< const int i1 = (ir - i3*ne2*ne1 - i2*ne1); | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); | |
< | |
< for (int i = 0; i < ne0; i++) { | |
< dst_ptr[i] = GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]; | |
< } | |
< } | |
< } | |
< } | |
< else { | |
< // src1 is not contiguous | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_add_bf16_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_BF16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F32); | |
< | |
< if (dst->type == GGML_TYPE_F32) { | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< } | |
< else { | |
< GGML_ASSERT(dst->type == GGML_TYPE_BF16); | |
< GGML_ASSERT( nb0 == sizeof(ggml_bf16_t)); | |
< } | |
< | |
< GGML_ASSERT(nb00 == sizeof(ggml_bf16_t)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(float)) { | |
< if (dst->type == GGML_TYPE_BF16) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src0, src1 and dst are same shape => same indices | |
< const int i3 = ir/(ne2*ne1); | |
< const int i2 = (ir - i3*ne2*ne1)/ne1; | |
< const int i1 = (ir - i3*ne2*ne1 - i2*ne1); | |
< | |
< ggml_bf16_t * dst_ptr = (ggml_bf16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); | |
< ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); | |
< | |
< for (int i = 0; i < ne0; i++) { | |
< dst_ptr[i] = GGML_FP32_TO_BF16(GGML_BF16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); | |
< } | |
< } | |
< } else { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src0, src1 and dst are same shape => same indices | |
< const int i3 = ir/(ne2*ne1); | |
< const int i2 = (ir - i3*ne2*ne1)/ne1; | |
< const int i1 = (ir - i3*ne2*ne1 - i2*ne1); | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); | |
< ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); | |
< | |
< for (int i = 0; i < ne0; i++) { | |
< dst_ptr[i] = GGML_BF16_TO_FP32(src0_ptr[i]) + src1_ptr[i]; | |
< } | |
< } | |
< } | |
< } | |
< else { | |
< // src1 is not contiguous | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_add_f16_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_F16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F16); | |
< GGML_ASSERT(dst->type == GGML_TYPE_F16); | |
< | |
< GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(ggml_fp16_t)) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< ggml_vec_add_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< } | |
< } | |
< } | |
< else { | |
< // src1 is not contiguous | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_add_bf16_bf16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_BF16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_BF16); | |
< GGML_ASSERT(dst->type == GGML_TYPE_BF16); | |
< | |
< GGML_ASSERT( nb0 == sizeof(ggml_bf16_t)); | |
< GGML_ASSERT(nb00 == sizeof(ggml_bf16_t)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(ggml_bf16_t)) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src0, src1 and dst are same shape => same indices | |
< const int i3 = ir/(ne2*ne1); | |
< const int i2 = (ir - i3*ne2*ne1)/ne1; | |
< const int i1 = (ir - i3*ne2*ne1 - i2*ne1); | |
< | |
< ggml_bf16_t * dst_ptr = (ggml_bf16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); | |
< ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); | |
< ggml_bf16_t * src1_ptr = (ggml_bf16_t *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); | |
< | |
< for (int i = 0; i < ne0; i++) { | |
< dst_ptr[i] = GGML_FP32_TO_BF16(GGML_BF16_TO_FP32(src0_ptr[i]) + GGML_BF16_TO_FP32(src1_ptr[i])); | |
< } | |
< } | |
< } | |
< else { | |
< // src1 is not contiguous | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< | |
4711,4718d4378 | |
< { | |
< if (src1->type == GGML_TYPE_F32) { | |
< ggml_compute_forward_add_f32(params, dst); | |
< } | |
< else { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } break; | |
4720,4730d4379 | |
< { | |
< if (src1->type == GGML_TYPE_F16) { | |
< ggml_compute_forward_add_f16_f16(params, dst); | |
< } | |
< else if (src1->type == GGML_TYPE_F32) { | |
< ggml_compute_forward_add_f16_f32(params, dst); | |
< } | |
< else { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } break; | |
4733,4741c4382 | |
< if (src1->type == GGML_TYPE_BF16) { | |
< ggml_compute_forward_add_bf16_bf16(params, dst); | |
< } | |
< else if (src1->type == GGML_TYPE_F32) { | |
< ggml_compute_forward_add_bf16_f32(params, dst); | |
< } | |
< else { | |
< GGML_ABORT("fatal error"); | |
< } | |
--- | |
> ggml_compute_forward_add_non_quantized(params, dst); | |
5275,6090d4915 | |
< // ggml_compute_forward_sub | |
< | |
< static void ggml_compute_forward_sub_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< assert(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< GGML_ASSERT(nb00 == sizeof(float)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(float)) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< #ifdef GGML_USE_ACCELERATE | |
< vDSP_vsub(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10); | |
< #else | |
< ggml_vec_sub_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< #endif | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< | |
< for (int64_t i0 = 0; i0 < ne0; ++i0) { | |
< const int64_t i10 = i0 % ne10; | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10); | |
< | |
< dst_ptr[i0] = src0_ptr[i0] - *src1_ptr; | |
< } | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sub_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< assert(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_F16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F16); | |
< GGML_ASSERT(dst->type == GGML_TYPE_F16); | |
< | |
< GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); | |
< | |
< // rows per thread | |
< const int dr = (nr + nth - 1)/nth; | |
< | |
< // row range for this thread | |
< const int ir0 = dr*ith; | |
< const int ir1 = MIN(ir0 + dr, nr); | |
< | |
< if (nb10 == sizeof(ggml_fp16_t)) { | |
< for (int ir = ir0; ir < ir1; ++ir) { | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< ggml_vec_sub_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< GGML_ABORT("unimplemented error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sub( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sub_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sub_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_mul | |
< | |
< static void ggml_compute_forward_mul_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int64_t nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< GGML_ASSERT(nb00 == sizeof(float)); | |
< | |
< if (nb10 == sizeof(float)) { | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0 ; r < nr0; ++r) { | |
< #ifdef GGML_USE_ACCELERATE | |
< UNUSED(ggml_vec_mul_f32); | |
< | |
< vDSP_vmul(src0_ptr + r*ne10, 1, src1_ptr, 1, dst_ptr + r*ne10, 1, ne10); | |
< #else | |
< ggml_vec_mul_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< #endif | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< | |
< for (int64_t i0 = 0; i0 < ne00; ++i0) { | |
< const int64_t i10 = i0 % ne10; | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10); | |
< | |
< dst_ptr[i0] = src0_ptr[i0] * (*src1_ptr); | |
< } | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_mul_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int64_t nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_F16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F16); | |
< GGML_ASSERT(dst->type == GGML_TYPE_F16); | |
< | |
< GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); | |
< | |
< if (nb10 == sizeof(ggml_fp16_t)) { | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0 ; r < nr0; ++r) { | |
< ggml_vec_mul_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< GGML_ABORT("unimplemented error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_mul( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT((src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16) && "only f32/f16 src1 supported for now"); | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_mul_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_mul_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_div | |
< | |
< static void ggml_compute_forward_div_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int64_t nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT( nb0 == sizeof(float)); | |
< GGML_ASSERT(nb00 == sizeof(float)); | |
< | |
< if (nb10 == sizeof(float)) { | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< #ifdef GGML_USE_ACCELERATE | |
< UNUSED(ggml_vec_div_f32); | |
< | |
< vDSP_vdiv(src1_ptr, 1, src0_ptr + r*ne10, 1, dst_ptr + r*ne10, 1, ne10); | |
< #else | |
< ggml_vec_div_f32(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< #endif | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< // src1 is broadcastable across src0 and dst in i1, i2, i3 | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< | |
< float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< | |
< for (int64_t i0 = 0; i0 < ne00; ++i0) { | |
< const int64_t i10 = i0 % ne10; | |
< float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10); | |
< | |
< dst_ptr[i0] = src0_ptr[i0] / (*src1_ptr); | |
< } | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_div_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< const struct ggml_tensor * src1 = dst->src[1]; | |
< | |
< GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); | |
< | |
< const int ith = params->ith; | |
< const int nth = params->nth; | |
< | |
< const int64_t nr = ggml_nrows(src0); | |
< | |
< GGML_TENSOR_BINARY_OP_LOCALS | |
< | |
< GGML_ASSERT(src0->type == GGML_TYPE_F16); | |
< GGML_ASSERT(src1->type == GGML_TYPE_F16); | |
< GGML_ASSERT(dst->type == GGML_TYPE_F16); | |
< | |
< GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); | |
< | |
< if (nb10 == sizeof(ggml_fp16_t)) { | |
< for (int64_t ir = ith; ir < nr; ir += nth) { | |
< // src0 and dst are same shape => same indices | |
< const int64_t i03 = ir/(ne02*ne01); | |
< const int64_t i02 = (ir - i03*ne02*ne01)/ne01; | |
< const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); | |
< | |
< const int64_t i13 = i03 % ne13; | |
< const int64_t i12 = i02 % ne12; | |
< const int64_t i11 = i01 % ne11; | |
< const int64_t nr0 = ne00 / ne10; | |
< | |
< ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); | |
< ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); | |
< ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); | |
< | |
< for (int64_t r = 0; r < nr0; ++r) { | |
< ggml_vec_div_f16(ne10, dst_ptr + r*ne10, src0_ptr + r*ne10, src1_ptr); | |
< } | |
< } | |
< } else { | |
< // src1 is not contiguous | |
< GGML_ABORT("unimplemented error"); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_div( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_div_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_div_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_sqr | |
< | |
< static void ggml_compute_forward_sqr_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< assert( dst->nb[0] == sizeof(float)); | |
< assert(src0->nb[0] == sizeof(float)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sqr_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sqr_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< assert( dst->nb[0] == sizeof(ggml_fp16_t)); | |
< assert(src0->nb[0] == sizeof(ggml_fp16_t)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sqr_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sqr( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sqr_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sqr_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_sqrt | |
< | |
< static void ggml_compute_forward_sqrt_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< assert( dst->nb[0] == sizeof(float)); | |
< assert(src0->nb[0] == sizeof(float)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sqrt_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sqrt_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< assert( dst->nb[0] == sizeof(ggml_fp16_t)); | |
< assert(src0->nb[0] == sizeof(ggml_fp16_t)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sqrt_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sqrt( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sqrt_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sqrt_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_log | |
< | |
< static void ggml_compute_forward_log_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(float)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(float)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_log_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_log_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_log_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_log( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_log_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_log_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_sin | |
< | |
< static void ggml_compute_forward_sin_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(float)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(float)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sin_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sin_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sin_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sin( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sin_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sin_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_cos | |
< | |
< static void ggml_compute_forward_cos_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(float)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(float)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_cos_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_cos_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< GGML_ASSERT(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< GGML_ASSERT( dst->nb[0] == sizeof(ggml_fp16_t)); | |
< GGML_ASSERT(src0->nb[0] == sizeof(ggml_fp16_t)); | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_cos_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_cos( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_cos_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_cos_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
6854,7429d5678 | |
< // ggml_compute_forward_abs | |
< | |
< static void ggml_compute_forward_abs_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_abs_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_abs_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_abs_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_abs( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_abs_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_abs_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_sgn | |
< | |
< static void ggml_compute_forward_sgn_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sgn_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sgn_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sgn_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sgn( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sgn_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sgn_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_neg | |
< | |
< static void ggml_compute_forward_neg_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_neg_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_neg_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_neg_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_neg( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_neg_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_neg_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_step | |
< | |
< static void ggml_compute_forward_step_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_step_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_step_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_step_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_step( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_step_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_step_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_tanh | |
< | |
< static void ggml_compute_forward_tanh_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_tanh_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_tanh_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_tanh_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_tanh( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_tanh_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_tanh_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_elu | |
< | |
< static void ggml_compute_forward_elu_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_elu_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_elu_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_elu_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_elu( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_elu_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_elu_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_relu | |
< | |
< static void ggml_compute_forward_relu_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_relu_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_relu_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_relu_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_relu( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_relu_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_relu_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< // ggml_compute_forward_sigmoid | |
< | |
< static void ggml_compute_forward_sigmoid_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sigmoid_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sigmoid_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_sigmoid_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_sigmoid( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_sigmoid_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_sigmoid_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
7932,8142d6180 | |
< | |
< static void ggml_compute_forward_hardswish_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_hardswish_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_hardswish_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_hardswish_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_hardswish( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_hardswish_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_hardswish_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_hardsigmoid_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_hardsigmoid_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_hardsigmoid_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_hardsigmoid_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_hardsigmoid( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_hardsigmoid_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_hardsigmoid_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_exp_f32( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_exp_f32(nc, | |
< (float *) ((char *) dst->data + i*( dst->nb[1])), | |
< (float *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_exp_f16( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< if (params->ith != 0) { | |
< return; | |
< } | |
< | |
< assert(ggml_is_contiguous_1(src0)); | |
< assert(ggml_is_contiguous_1(dst)); | |
< assert(ggml_are_same_shape(src0, dst)); | |
< | |
< const int n = ggml_nrows(src0); | |
< const int nc = src0->ne[0]; | |
< | |
< for (int i = 0; i < n; i++) { | |
< ggml_vec_exp_f16(nc, | |
< (ggml_fp16_t *) ((char *) dst->data + i*( dst->nb[1])), | |
< (ggml_fp16_t *) ((char *) src0->data + i*(src0->nb[1]))); | |
< } | |
< } | |
< | |
< static void ggml_compute_forward_exp( | |
< const struct ggml_compute_params * params, | |
< struct ggml_tensor * dst) { | |
< | |
< const struct ggml_tensor * src0 = dst->src[0]; | |
< | |
< switch (src0->type) { | |
< case GGML_TYPE_F32: | |
< { | |
< ggml_compute_forward_exp_f32(params, dst); | |
< } break; | |
< case GGML_TYPE_F16: | |
< { | |
< ggml_compute_forward_exp_f16(params, dst); | |
< } break; | |
< default: | |
< { | |
< GGML_ABORT("fatal error"); | |
< } | |
< } | |
< } | |
< |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment