Dear all,

I am using OpenCL to run a procedure in my graphical board, which is yielding strange results. I've debugged the procedure the most I could, and I noticed that the error starts when I call the function C6_pow_CMP (see the attached code, marked with "// This is the line where the error starts"), which for some reason, is returning 1.#IND00e+000 when it shouldn't.

I launched a simpler procedure in my graphical board, where I also call the function C6_pow_CMP, but in this case the obtained results are the expected.

May this be some compilation bug of the graphical board? I would appreciate any comment or suggestion.

Later, I can send the C code that launches the procedure on the GPU and sets up all the variables to whoever may be interested.

Thank you

This is the code that is yielding the wrong results (it is a bit too long):
Code :
 
//// GPU code
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#define type float
#define N_DOF_SVP 10
#define N_DOF_SH 5
#define N_HOR_COORD 5
#define N_VBEM_HOR_COORD 0
#define N_X 5
#define N_VERT_BOUND 0
#define N_NODE_BEM 3
#define N_25D_BEM_ELEM 3
#define N_THIN_LAYER 5
#define K_ORDER_TLM 1
#define N_PSR 0
#define K_BEM_ORDER 1
#define N_DOF_1 9
#define N_DOF_2 0
#define N_DOF_F 0
 
void C6_add_CMP(type*, type*, type*);
void C6_sub_CMP(type*, type*, type*);
void C6_mult_CMP(type*, type*, type*);
void C6_div_CMP(type*, type*, type*);
void C6_exp_CMP(type*, type*);
void C6_pow_CMP(type*, type*, type);
 
__kernel void CalcTLM_25D_Integrals_GPU_SVP
(
	int nX,
	int nDofSVP,
	type fKy,
	__global type* ad1X,
	__global type* ad1Im3_SVP,
	__global type* ad1Im2_SVP,
	__global type* ad1Im1_SVP,
	__global type* ad1I0_SVP,
	__global type* ad1I1_SVP,
	__global type* ad1I2_SVP,
	__global type* ad1I3_SVP,
	__global type* ad1SVP_Poles
)
{
	int iMode = get_global_id(0) + 1;
	int jMode;
	int iX = get_global_id(1) + 1;
	int jX;
	int kSign;
	type fX;
	type fAux;
	type cmpAux1[3];
	type cmpAux2[3];
	type cmpAux3[3];
	type cmpIm[3];
	type cmpKj[3];
	type cmpKj2[3];
	type cmpKj2Ky2[3];
	type cmpKy[3];
	type cmpKy2[3];
	type cmpKy3[3];
	type cmpKy4[3];
	type cmpOne[3];
	type cmp_Exp_Ky_x[3];
	type cmp_Exp_sKj2Ky2_x[3];
	type cmp_sKj2Ky2[3];
	__global type* cmpIm3_RE;
	__global type* cmpIm3_IM;
	__global type* cmpIm2_RE;
	__global type* cmpIm2_IM;
	__global type* cmpIm1_RE;
	__global type* cmpIm1_IM;
	__global type* cmpI0_RE;
	__global type* cmpI0_IM;
	__global type* cmpI1_RE;
	__global type* cmpI1_IM;
	__global type* cmpI2_RE;
	__global type* cmpI2_IM;
	__global type* cmpI3_RE;
	__global type* cmpI3_IM;
 
	__global type* acmp1SVP_Poles[3];
	__global type* acmp1Im3_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1Im2_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1Im1_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1I0_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1I1_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1I2_SVP[2*N_X*N_DOF_SVP+1];
	__global type* acmp1I3_SVP[2*N_X*N_DOF_SVP+1];
	__global type** acmp2Im3_SVP[2*N_X+1];
	__global type** acmp2Im2_SVP[2*N_X+1];
	__global type** acmp2Im1_SVP[2*N_X+1];
	__global type** acmp2I0_SVP[2*N_X+1];
	__global type** acmp2I1_SVP[2*N_X+1];
	__global type** acmp2I2_SVP[2*N_X+1];
	__global type** acmp2I3_SVP[2*N_X+1];
	__global type*** acmp3Im3_SVP[3];
	__global type*** acmp3Im2_SVP[3];
	__global type*** acmp3Im1_SVP[3];
	__global type*** acmp3I0_SVP[3];
	__global type*** acmp3I1_SVP[3];
	__global type*** acmp3I2_SVP[3];
	__global type*** acmp3I3_SVP[3];
 
	/* Calculate pointers */
	acmp1SVP_Poles[1] = &ad1SVP_Poles[0];
	acmp1SVP_Poles[1]--;
	acmp1SVP_Poles[2] = &ad1SVP_Poles[nDofSVP];
	acmp1SVP_Poles[2]--;
 
	acmp3Im3_SVP[1] = &acmp2Im3_SVP[1];
	acmp3Im3_SVP[1]--;
	acmp3Im3_SVP[2] = &acmp2Im3_SVP[1+nX];
	acmp3Im3_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im3_SVP[1][jX] = &acmp1Im3_SVP[(jX-1)*nDofSVP + 1];
		acmp3Im3_SVP[1][jX]--;
		acmp3Im3_SVP[2][jX] = &acmp1Im3_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3Im3_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3Im3_SVP[1][jX][jMode] =
							&ad1Im3_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im3_SVP[1][jX][jMode]--;
			acmp3Im3_SVP[2][jX][jMode] =
							&ad1Im3_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im3_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3Im2_SVP[1] = &acmp2Im2_SVP[1];
	acmp3Im2_SVP[1]--;
	acmp3Im2_SVP[2] = &acmp2Im2_SVP[1+nX];
	acmp3Im2_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im2_SVP[1][jX] = &acmp1Im2_SVP[(jX-1)*nDofSVP + 1];
		acmp3Im2_SVP[1][jX]--;
		acmp3Im2_SVP[2][jX] = &acmp1Im2_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3Im2_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3Im2_SVP[1][jX][jMode] =
							&ad1Im2_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im2_SVP[1][jX][jMode]--;
			acmp3Im2_SVP[2][jX][jMode] =
							&ad1Im2_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im2_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3Im1_SVP[1] = &acmp2Im1_SVP[1];
	acmp3Im1_SVP[1]--;
	acmp3Im1_SVP[2] = &acmp2Im1_SVP[1+nX];
	acmp3Im1_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im1_SVP[1][jX] = &acmp1Im1_SVP[(jX-1)*nDofSVP + 1];
		acmp3Im1_SVP[1][jX]--;
		acmp3Im1_SVP[2][jX] = &acmp1Im1_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3Im1_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3Im1_SVP[1][jX][jMode] =
							&ad1Im1_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im1_SVP[1][jX][jMode]--;
			acmp3Im1_SVP[2][jX][jMode] =
							&ad1Im1_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3Im1_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I0_SVP[1] = &acmp2I0_SVP[1];
	acmp3I0_SVP[1]--;
	acmp3I0_SVP[2] = &acmp2I0_SVP[1+nX];
	acmp3I0_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I0_SVP[1][jX] = &acmp1I0_SVP[(jX-1)*nDofSVP + 1];
		acmp3I0_SVP[1][jX]--;
		acmp3I0_SVP[2][jX] = &acmp1I0_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3I0_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3I0_SVP[1][jX][jMode] =
							&ad1I0_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I0_SVP[1][jX][jMode]--;
			acmp3I0_SVP[2][jX][jMode] =
							&ad1I0_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I0_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I1_SVP[1] = &acmp2I1_SVP[1];
	acmp3I1_SVP[1]--;
	acmp3I1_SVP[2] = &acmp2I1_SVP[1+nX];
	acmp3I1_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I1_SVP[1][jX] = &acmp1I1_SVP[(jX-1)*nDofSVP + 1];
		acmp3I1_SVP[1][jX]--;
		acmp3I1_SVP[2][jX] = &acmp1I1_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3I1_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3I1_SVP[1][jX][jMode] =
							&ad1I1_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I1_SVP[1][jX][jMode]--;
			acmp3I1_SVP[2][jX][jMode] =
							&ad1I1_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I1_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I2_SVP[1] = &acmp2I2_SVP[1];
	acmp3I2_SVP[1]--;
	acmp3I2_SVP[2] = &acmp2I2_SVP[1+nX];
	acmp3I2_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I2_SVP[1][jX] = &acmp1I2_SVP[(jX-1)*nDofSVP + 1];
		acmp3I2_SVP[1][jX]--;
		acmp3I2_SVP[2][jX] = &acmp1I2_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3I2_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3I2_SVP[1][jX][jMode] =
							&ad1I2_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I2_SVP[1][jX][jMode]--;
			acmp3I2_SVP[2][jX][jMode] =
							&ad1I2_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I2_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I3_SVP[1] = &acmp2I3_SVP[1];
	acmp3I3_SVP[1]--;
	acmp3I3_SVP[2] = &acmp2I3_SVP[1+nX];
	acmp3I3_SVP[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I3_SVP[1][jX] = &acmp1I3_SVP[(jX-1)*nDofSVP + 1];
		acmp3I3_SVP[1][jX]--;
		acmp3I3_SVP[2][jX] = &acmp1I3_SVP[(nX+jX-1)*nDofSVP + 1];
		acmp3I3_SVP[2][jX]--;
		for (jMode = 1; jMode <= nDofSVP; jMode++)
		{
			acmp3I3_SVP[1][jX][jMode] =
							&ad1I3_SVP[(jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I3_SVP[1][jX][jMode]--;
			acmp3I3_SVP[2][jX][jMode] =
							&ad1I3_SVP[(nX+jX-1)*nDofSVP*6+(jMode-1)*6];
			acmp3I3_SVP[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	cmpKy[1] = fKy;
	cmpKy[2] = 0.0;
	cmpKy2[1] = fKy * fKy;
	cmpKy2[2] = 0.0;
	cmpKy3[1] = fabs(fKy * fKy * fKy);
	cmpKy3[2] = 0.0;
	cmpKy4[1] = fKy * fKy * fKy * fKy;
	cmpKy4[2] = 0.0;
	cmpOne[1] = 1.0;
	cmpOne[2] = 0.0;
	cmpIm[1] = 0.0;
	cmpIm[2] = 1.0;
 
	cmpKj[1] = acmp1SVP_Poles[1][iMode];
	cmpKj[2] = acmp1SVP_Poles[2][iMode];
	C6_mult_CMP(cmpKj2, cmpKj, cmpKj);
	C6_sub_CMP(cmpKj2Ky2, cmpKj2, cmpKy2);
	fAux = 0.5;
	C6_pow_CMP(cmp_sKj2Ky2, cmpKj2Ky2, fAux); // This is the line where the error starts
	if (cmp_sKj2Ky2[2] > 0.0)
	{
		cmp_sKj2Ky2[1] = -cmp_sKj2Ky2[1];
		cmp_sKj2Ky2[2] = -cmp_sKj2Ky2[2];
	}
 
	/* Calculate SVP integrals */
	cmpIm3_RE = acmp3Im3_SVP[1][iX][iMode];
	cmpIm3_IM = acmp3Im3_SVP[2][iX][iMode];
	cmpIm2_RE = acmp3Im2_SVP[1][iX][iMode];
	cmpIm2_IM = acmp3Im2_SVP[2][iX][iMode];
	cmpIm1_RE = acmp3Im1_SVP[1][iX][iMode];
	cmpIm1_IM = acmp3Im1_SVP[2][iX][iMode];
	cmpI0_RE = acmp3I0_SVP[1][iX][iMode];
	cmpI0_IM = acmp3I0_SVP[2][iX][iMode];
	cmpI1_RE = acmp3I1_SVP[1][iX][iMode];
	cmpI1_IM = acmp3I1_SVP[2][iX][iMode];
	cmpI2_RE = acmp3I2_SVP[1][iX][iMode];
	cmpI2_IM = acmp3I2_SVP[2][iX][iMode];
	cmpI3_RE = acmp3I3_SVP[1][iX][iMode];
	cmpI3_IM = acmp3I3_SVP[2][iX][iMode];
 
	fX = ad1X[iX - 1];
	if (fX > 0.0) kSign = 1;
	else if (fX == 0.0) kSign = 0;
	else if (fX < 0.0) kSign = -1;
	fX = fabs(fX);
 
	cmpAux1[1] = fX * cmp_sKj2Ky2[2];
	cmpAux1[2] = -fX * cmp_sKj2Ky2[1];
	C6_exp_CMP(cmp_Exp_sKj2Ky2_x, cmpAux1);
 
	cmpAux1[1] = -fabs(fX * fKy);
	cmpAux1[2] = 0.0;
	C6_exp_CMP(cmp_Exp_Ky_x, cmpAux1);
 
	/* Im3,1 */
	cmpAux3[1] = fX * fX / 2.0;
	cmpAux3[2] = 0.0;
	C6_div_CMP(cmpAux1, cmpOne, cmpKj2Ky2);
	cmpAux3[1] -= cmpAux1[1];
	cmpAux3[2] -= cmpAux1[2];
	C6_div_CMP(cmpAux1, cmp_Exp_sKj2Ky2_x, cmpKj2Ky2);
	cmpAux3[1] += cmpAux1[1];
	cmpAux3[2] += cmpAux1[2];
	cmpAux1[1] = 2.0 * cmpKj2Ky2[2];
	cmpAux1[2] = -2.0 * cmpKj2Ky2[1];
	C6_div_CMP(cmpAux2, cmpAux3, cmpAux1);
	cmpAux3[1] = -kSign * cmpAux2[1];
	cmpAux3[2] = -kSign * cmpAux2[2];
	cmpIm3_RE[1] = cmpKj[1];
	cmpIm3_IM[1] = cmpKj[2];
 
	/* Im3,2 */
	cmpAux1[1] = fX;
	cmpAux1[2] = 0.0;
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2Ky2);
	cmpAux1[1] = fabs(fKy);
	cmpAux1[2] = 0.0;
	C6_mult_CMP(cmpAux3, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmp_Exp_Ky_x, cmpAux3);
	cmpAux2[1] += cmpAux1[1];
	cmpAux2[2] += cmpAux1[2];
	cmpAux1[1] = 0.0;
	cmpAux1[2] = -fKy * fKy;
	C6_mult_CMP(cmpAux3, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	C6_div_CMP(cmpAux1, cmpAux3, cmp_sKj2Ky2);
	cmpAux2[1] += cmpAux1[1];
	cmpAux2[2] += cmpAux1[2];
	C6_div_CMP(cmpAux3, cmpAux2, cmpKy);
	cmpAux3[1] /= 2.0;
	cmpAux3[2] /= 2.0;
	cmpIm3_RE[2] = cmpKj2[1];
	cmpIm3_IM[2] = cmpKj2[2];
 
	/* Im3,3 */
	C6_div_CMP(cmpAux1, cmpOne, cmpKj2Ky2);
	cmpAux1[1] = -cmpAux1[1];
	cmpAux1[2] = -cmpAux1[2];
	C6_div_CMP(cmpAux2, cmp_Exp_Ky_x, cmpKj2);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	C6_mult_CMP(cmpAux2, cmpKy2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux3, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux3, cmpKj2Ky2);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	cmpAux2[1] = 0.0;
	cmpAux2[2] = 2.0 * fKy * fKy;
	C6_div_CMP(cmpAux3, cmpAux1, cmpAux2);
	cmpIm3_RE[3] = cmpKj2Ky2[1];
	cmpIm3_IM[3] = cmpKj2Ky2[2];
 
	/* Im3,4 */
	cmpAux1[1] = -fX * fX / 2.0;
	cmpAux1[2] = 0.0;
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2Ky2);
	cmpAux1[1] = 2.0 * cmpKy2[1] - cmpKj2[1];
	cmpAux1[2] = 2.0 * cmpKy2[2] - cmpKj2[2];
	C6_div_CMP(cmpAux3, cmpAux1, cmpKy2);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2Ky2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	cmpAux2[1] += cmpAux3[1];
	cmpAux2[2] += cmpAux3[2];
	C6_div_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2);
	cmpAux2[1] += cmpAux3[1];
	cmpAux2[2] += cmpAux3[2];
	C6_mult_CMP(cmpAux1, cmpKy2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2Ky2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	cmpAux2[1] -= cmpAux3[1];
	cmpAux2[2] -= cmpAux3[2];
	C6_div_CMP(cmpAux3, cmpAux2, cmpIm);
	cmpAux3[1] *= -kSign / 2.0;
	cmpAux3[2] *= -kSign / 2.0;
	cmpIm3_RE[4] = cmp_sKj2Ky2[1];
	cmpIm3_IM[4] = cmp_sKj2Ky2[2];
 
	/* Im3,5 */
	cmpAux1[1] = fX;
	cmpAux1[2] = 0.0;
	C6_div_CMP(cmpAux2, cmp_Exp_sKj2Ky2_x, cmp_sKj2Ky2);
	C6_mult_CMP(cmpAux3, cmpAux2, cmpIm);
	cmpAux1[1] -= cmpAux3[1];
	cmpAux1[2] -= cmpAux3[2];
	C6_mult_CMP(cmpAux2, cmpKj, cmpKj2Ky2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpAux2);
	cmpIm3_RE[5] = cmp_Exp_sKj2Ky2_x[1];
	cmpIm3_IM[5] = cmp_Exp_sKj2Ky2_x[2];
 
	/* Im3,6 */
	cmpAux1[1] = fKy * cmpIm3_RE[1];
	cmpAux1[2] = fKy * cmpIm3_IM[1];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpIm3_RE[6] = cmp_Exp_Ky_x[1];
	cmpIm3_IM[6] = cmp_Exp_Ky_x[2];
 
	if (fKy == 0)
	{
		cmpIm3_RE[2] = 0.0;
		cmpIm3_IM[2] = 0.0;
		cmpIm3_RE[4] = 0.0;
		cmpIm3_IM[4] = 0.0;
		cmpIm3_RE[6] = 0.0;
		cmpIm3_IM[6] = 0.0;
		cmpIm3_RE[3] = cmpIm3_RE[1];
		cmpIm3_IM[3] = cmpIm3_IM[1];
	} /* if */
 
	/* Im2,1 */
	cmpAux1[1] = cmpIm3_RE[5];
	cmpAux1[2] = cmpIm3_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpIm2_RE[1] = cmpAux2[1];
	cmpIm2_IM[1] = cmpAux2[2];
 
	/* Im2,2 */
	cmpIm2_RE[2] = cmpIm3_RE[3] *fKy;
	cmpIm2_IM[2] = cmpIm3_IM[3] *fKy;
 
	/* Im2,3 */
	cmpAux1[1] = cmp_Exp_Ky_x[1] / fabs(fKy);
	cmpAux1[2] = cmp_Exp_Ky_x[2] / fabs(fKy);
	C6_div_CMP(cmpAux2, cmp_Exp_sKj2Ky2_x, cmp_sKj2Ky2);
	cmpAux1[1] -= cmpAux2[2];
	cmpAux1[2] += cmpAux2[1];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	cmpIm2_RE[3] = -cmpAux2[1] / 2.0;
	cmpIm2_IM[3] = -cmpAux2[2] / 2.0;
 
	/* Im2,4 */
	cmpIm2_RE[4] = cmpIm3_RE[2] *fKy;
	cmpIm2_IM[4] = cmpIm3_IM[2] *fKy;
 
	/* Im2,5 */
	C6_sub_CMP(cmpAux1, cmpOne, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2Ky2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpKj);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpIm2_RE[5] = -kSign * cmpAux2[1] / 2.0;
	cmpIm2_IM[5] = -kSign * cmpAux2[2] / 2.0;
 
	/* Im2,6 */
	cmpIm2_RE[6] = cmpIm3_RE[5] *fKy;
	cmpIm2_IM[6] = cmpIm3_IM[5] *fKy;
 
	if (fKy == 0)
	{
		cmpIm2_RE[2] = 0.0;
		cmpIm2_IM[2] = 0.0;
		cmpIm2_RE[4] = 0.0;
		cmpIm2_IM[4] = 0.0;
		cmpIm2_RE[6] = 0.0;
		cmpIm2_IM[6] = 0.0;
		cmpIm2_RE[3] = cmpIm2_RE[1];
		cmpIm2_IM[3] = cmpIm2_IM[1];
	} /* if */
 
	/* Im1,1 */
	cmpAux1[1] = cmpIm2_RE[5];
	cmpAux1[2] = cmpIm2_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpIm1_RE[1] = cmpAux2[1];
	cmpIm1_IM[1] = cmpAux2[2];
 
	/* Im1,2 */
	cmpIm1_RE[2] = cmpIm2_RE[3] *fKy;
	cmpIm1_IM[2] = cmpIm2_IM[3] *fKy;
 
	/* Im1,3 */
	C6_sub_CMP(cmpAux1, cmp_Exp_Ky_x, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpIm1_RE[3] = -kSign * cmpAux1[1] / 2.0;
	cmpIm1_IM[3] = -kSign * cmpAux1[2] / 2.0;
 
	/* Im1,4 */
	cmpIm1_RE[4] = cmpIm2_RE[2] *fKy;
	cmpIm1_IM[4] = cmpIm2_IM[2] *fKy;
 
	/* Im1,5 */
	C6_div_CMP(cmpAux1, cmp_Exp_sKj2Ky2_x, cmp_sKj2Ky2);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpIm1_RE[5] = cmpAux1[1] / 2.0;
	cmpIm1_IM[5] = cmpAux1[2] / 2.0;
 
	/* Im1,6 */
	cmpIm1_RE[6] = cmpIm2_RE[5] *fKy;
	cmpIm1_IM[6] = cmpIm2_IM[5] *fKy;
 
	/* I0,1 */
	cmpAux1[1] = cmpIm1_RE[5];
	cmpAux1[2] = cmpIm1_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpI0_RE[1] = cmpAux2[1];
	cmpI0_IM[1] = cmpAux2[2];
 
	/* I0,2 */
	cmpI0_RE[2] = cmpIm1_RE[3] * fKy;
	cmpI0_IM[2] = cmpIm1_IM[3] * fKy;
 
	/* I0,3 */
	C6_mult_CMP(cmpAux1, cmp_sKj2Ky2, cmp_Exp_sKj2Ky2_x);
	cmpAux2[1] = cmp_Exp_Ky_x[1] * fabs(fKy);
	cmpAux2[2] = cmp_Exp_Ky_x[2] * fabs(fKy);
	cmpAux1[1] -= cmpAux2[2];
	cmpAux1[2] += cmpAux2[1];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI0_RE[3] = cmpAux1[1] / 2.0;
	cmpI0_IM[3] = cmpAux1[2] / 2.0;
 
	/* I0,4 */
	cmpI0_RE[4] = cmpIm1_RE[2] * fKy;
	cmpI0_IM[4] = cmpIm1_IM[2] * fKy;
 
	/* I0,5 */
	C6_div_CMP(cmpAux1, cmp_Exp_sKj2Ky2_x, cmpKj);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpI0_RE[5] = kSign * cmpAux2[1] / 2.0;
	cmpI0_IM[5] = kSign * cmpAux2[2] / 2.0;
 
	/* I0,6 */
	cmpI0_RE[6] = cmpIm1_RE[5] * fKy;
	cmpI0_IM[6] = cmpIm1_IM[5] * fKy;
 
	/* I1,1 */
	cmpAux1[1] = cmpI0_RE[5];
	cmpAux1[2] = cmpI0_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpI1_RE[1] = cmpAux2[1];
	cmpI1_IM[1] = cmpAux2[2];
 
	/* I1,2 */
	cmpI1_RE[2] = cmpI0_RE[3] * fKy;
	cmpI1_IM[2] = cmpI0_IM[3] * fKy;
 
	/* I1,3 */
	C6_mult_CMP(cmpAux1, cmpKj2Ky2, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux2, cmpKy2, cmp_Exp_Ky_x);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI1_RE[3] = kSign * cmpAux1[1] / 2.0;
	cmpI1_IM[3] = kSign * cmpAux1[2] / 2.0;
 
	/* I1,4 */
	cmpI1_RE[4] = cmpI0_RE[2] * fKy;
	cmpI1_IM[4] = cmpI0_IM[2] * fKy;
 
	/* I1,5 */
	C6_mult_CMP(cmpAux1, cmp_sKj2Ky2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI1_RE[5] = cmpAux1[1] / 2.0;
	cmpI1_IM[5] = cmpAux1[2] / 2.0;
 
	/* I1,6 */
	cmpI1_RE[6] = cmpI0_RE[5] * fKy;
	cmpI1_IM[6] = cmpI0_IM[5] * fKy;
 
	/* I2,1 */
	cmpAux1[1] = cmpI1_RE[5];
	cmpAux1[2] = cmpI1_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpI2_RE[1] = cmpAux2[1];
	cmpI2_IM[1] = cmpAux2[2];
 
	/* I2,2 */
	cmpI2_RE[2] = cmpI1_RE[3] * fKy;
	cmpI2_IM[2] = cmpI1_IM[3] * fKy;
 
	/* I2,3 */
	C6_mult_CMP(cmpAux1, cmp_sKj2Ky2, cmpKj2Ky2);
	C6_mult_CMP(cmpAux2, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy2);
	cmpAux1[1] *= fabs(fKy);
	cmpAux1[2] *= fabs(fKy);
	cmpAux2[1] += cmpAux1[2];
	cmpAux2[2] -= cmpAux1[1];
	C6_div_CMP(cmpAux1, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpI2_RE[3] = cmpAux2[1] / 2.0;
	cmpI2_IM[3] = cmpAux2[2] / 2.0;
 
	/* I2,4 */
	cmpI2_RE[4] = cmpI1_RE[2] * fKy;
	cmpI2_IM[4] = cmpI1_IM[2] * fKy;
 
	/* I2,5 */
	C6_mult_CMP(cmpAux1, cmpKj2Ky2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI2_RE[5] = kSign * cmpAux1[1] / 2.0;
	cmpI2_IM[5] = kSign * cmpAux1[2] / 2.0;
 
	/* I2,6 */
	cmpI2_RE[6] = cmpI1_RE[5] * fKy;
	cmpI2_IM[6] = cmpI1_IM[5] * fKy;
 
	/* I3,1 */
	cmpAux1[1] = cmpI2_RE[5];
	cmpAux1[2] = cmpI2_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpI3_RE[1] = cmpAux2[1];
	cmpI3_IM[1] = cmpAux2[2];
 
	/* I3,2 */
	cmpI3_RE[2] = cmpI2_RE[3] * fKy;
	cmpI3_IM[2] = cmpI2_IM[3] * fKy;
 
	/* I3,3 */
	C6_mult_CMP(cmpAux1, cmpKj2Ky2, cmpKj2Ky2);
	C6_mult_CMP(cmpAux2, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy4);
	cmpAux2[1] -= cmpAux1[1];
	cmpAux2[2] -= cmpAux1[2];
	C6_div_CMP(cmpAux1, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpI3_RE[3] = kSign * cmpAux2[1] / 2.0;
	cmpI3_IM[3] = kSign * cmpAux2[2] / 2.0;
 
	/* I3,4 */
	cmpI3_RE[4] = cmpI2_RE[2] * fKy;
	cmpI3_IM[4] = cmpI2_IM[2] * fKy;
 
	/* I3,5 */
	C6_mult_CMP(cmpAux2, cmp_sKj2Ky2, cmpKj2Ky2);
	C6_mult_CMP(cmpAux1, cmpAux2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI3_RE[5] = cmpAux1[1] / 2.0;
	cmpI3_IM[5] = cmpAux1[2] / 2.0;
 
	/* I3,6 */
	cmpI3_RE[6] = cmpI2_RE[5] * fKy;
	cmpI3_IM[6] = cmpI2_IM[5] * fKy;
} /* CalcTLM_25D_Integrals_GPU_SVP */
 
__kernel void CalcTLM_25D_Integrals_GPU_SH
(
	int nX,
	int nDofSH,
	type fKy,
	__global type* ad1X,
	__global type* ad1Im3_SH,
	__global type* ad1Im2_SH,
	__global type* ad1Im1_SH,
	__global type* ad1I0_SH,
	__global type* ad1I1_SH,
	__global type* ad1I2_SH,
	__global type* ad1I3_SH,
	__global type* ad1SH_Poles
)
{
	int iMode = get_global_id(0) + 1;
	int jMode;
	int iX = get_global_id(1) + 1;
	int jX;
	int kSign;
	type fX;
	type fAux;
	type cmpAux1[3];
	type cmpAux2[3];
	type cmpAux3[3];
	type cmpIm[3];
	type cmpKj[3];
	type cmpKj2[3];
	type cmpKj2Ky2[3];
	type cmpKy[3];
	type cmpKy2[3];
	type cmpKy3[3];
	type cmpKy4[3];
	type cmpOne[3];
	type cmp_Exp_Ky_x[3];
	type cmp_Exp_sKj2Ky2_x[3];
	type cmp_sKj2Ky2[3];
	__global type* cmpIm3_RE;
	__global type* cmpIm3_IM;
	__global type* cmpIm2_RE;
	__global type* cmpIm2_IM;
	__global type* cmpIm1_RE;
	__global type* cmpIm1_IM;
	__global type* cmpI0_RE;
	__global type* cmpI0_IM;
	__global type* cmpI1_RE;
	__global type* cmpI1_IM;
	__global type* cmpI2_RE;
	__global type* cmpI2_IM;
	__global type* cmpI3_RE;
	__global type* cmpI3_IM;
 
	__global type* acmp1SH_Poles[3];
	__global type* acmp1Im3_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1Im2_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1Im1_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1I0_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1I1_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1I2_SH[2*N_X*N_DOF_SH+1];
	__global type* acmp1I3_SH[2*N_X*N_DOF_SH+1];
	__global type** acmp2Im3_SH[2*N_X+1];
	__global type** acmp2Im2_SH[2*N_X+1];
	__global type** acmp2Im1_SH[2*N_X+1];
	__global type** acmp2I0_SH[2*N_X+1];
	__global type** acmp2I1_SH[2*N_X+1];
	__global type** acmp2I2_SH[2*N_X+1];
	__global type** acmp2I3_SH[2*N_X+1];
	__global type*** acmp3Im3_SH[3];
	__global type*** acmp3Im2_SH[3];
	__global type*** acmp3Im1_SH[3];
	__global type*** acmp3I0_SH[3];
	__global type*** acmp3I1_SH[3];
	__global type*** acmp3I2_SH[3];
	__global type*** acmp3I3_SH[3];
 
	/* Calculate pointers */
	acmp1SH_Poles[1] = &ad1SH_Poles[0];
	acmp1SH_Poles[1]--;
	acmp1SH_Poles[2] = &ad1SH_Poles[nDofSH];
	acmp1SH_Poles[2]--;
 
	acmp3Im3_SH[1] = &acmp2Im3_SH[1];
	acmp3Im3_SH[1]--;
	acmp3Im3_SH[2] = &acmp2Im3_SH[1+nX];
	acmp3Im3_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im3_SH[1][jX] = &acmp1Im3_SH[(jX-1)*nDofSH + 1];
		acmp3Im3_SH[1][jX]--;
		acmp3Im3_SH[2][jX] = &acmp1Im3_SH[(nX+jX-1)*nDofSH + 1];
		acmp3Im3_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3Im3_SH[1][jX][jMode] =
							&ad1Im3_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im3_SH[1][jX][jMode]--;
			acmp3Im3_SH[2][jX][jMode] =
							&ad1Im3_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im3_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3Im2_SH[1] = &acmp2Im2_SH[1];
	acmp3Im2_SH[1]--;
	acmp3Im2_SH[2] = &acmp2Im2_SH[1+nX];
	acmp3Im2_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im2_SH[1][jX] = &acmp1Im2_SH[(jX-1)*nDofSH + 1];
		acmp3Im2_SH[1][jX]--;
		acmp3Im2_SH[2][jX] = &acmp1Im2_SH[(nX+jX-1)*nDofSH + 1];
		acmp3Im2_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3Im2_SH[1][jX][jMode] =
							&ad1Im2_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im2_SH[1][jX][jMode]--;
			acmp3Im2_SH[2][jX][jMode] =
							&ad1Im2_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im2_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3Im1_SH[1] = &acmp2Im1_SH[1];
	acmp3Im1_SH[1]--;
	acmp3Im1_SH[2] = &acmp2Im1_SH[1+nX];
	acmp3Im1_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3Im1_SH[1][jX] = &acmp1Im1_SH[(jX-1)*nDofSH + 1];
		acmp3Im1_SH[1][jX]--;
		acmp3Im1_SH[2][jX] = &acmp1Im1_SH[(nX+jX-1)*nDofSH + 1];
		acmp3Im1_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3Im1_SH[1][jX][jMode] =
							&ad1Im1_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im1_SH[1][jX][jMode]--;
			acmp3Im1_SH[2][jX][jMode] =
							&ad1Im1_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3Im1_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I0_SH[1] = &acmp2I0_SH[1];
	acmp3I0_SH[1]--;
	acmp3I0_SH[2] = &acmp2I0_SH[1+nX];
	acmp3I0_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I0_SH[1][jX] = &acmp1I0_SH[(jX-1)*nDofSH + 1];
		acmp3I0_SH[1][jX]--;
		acmp3I0_SH[2][jX] = &acmp1I0_SH[(nX+jX-1)*nDofSH + 1];
		acmp3I0_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3I0_SH[1][jX][jMode] =
							&ad1I0_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I0_SH[1][jX][jMode]--;
			acmp3I0_SH[2][jX][jMode] =
							&ad1I0_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I0_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I1_SH[1] = &acmp2I1_SH[1];
	acmp3I1_SH[1]--;
	acmp3I1_SH[2] = &acmp2I1_SH[1+nX];
	acmp3I1_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I1_SH[1][jX] = &acmp1I1_SH[(jX-1)*nDofSH + 1];
		acmp3I1_SH[1][jX]--;
		acmp3I1_SH[2][jX] = &acmp1I1_SH[(nX+jX-1)*nDofSH + 1];
		acmp3I1_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3I1_SH[1][jX][jMode] =
							&ad1I1_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I1_SH[1][jX][jMode]--;
			acmp3I1_SH[2][jX][jMode] =
							&ad1I1_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I1_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I2_SH[1] = &acmp2I2_SH[1];
	acmp3I2_SH[1]--;
	acmp3I2_SH[2] = &acmp2I2_SH[1+nX];
	acmp3I2_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I2_SH[1][jX] = &acmp1I2_SH[(jX-1)*nDofSH + 1];
		acmp3I2_SH[1][jX]--;
		acmp3I2_SH[2][jX] = &acmp1I2_SH[(nX+jX-1)*nDofSH + 1];
		acmp3I2_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3I2_SH[1][jX][jMode] =
							&ad1I2_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I2_SH[1][jX][jMode]--;
			acmp3I2_SH[2][jX][jMode] =
							&ad1I2_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I2_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	acmp3I3_SH[1] = &acmp2I3_SH[1];
	acmp3I3_SH[1]--;
	acmp3I3_SH[2] = &acmp2I3_SH[1+nX];
	acmp3I3_SH[2]--;
	for (jX = 1; jX <= nX; jX++)
	{
		acmp3I3_SH[1][jX] = &acmp1I3_SH[(jX-1)*nDofSH + 1];
		acmp3I3_SH[1][jX]--;
		acmp3I3_SH[2][jX] = &acmp1I3_SH[(nX+jX-1)*nDofSH + 1];
		acmp3I3_SH[2][jX]--;
		for (jMode = 1; jMode <= nDofSH; jMode++)
		{
			acmp3I3_SH[1][jX][jMode] =
							&ad1I3_SH[(jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I3_SH[1][jX][jMode]--;
			acmp3I3_SH[2][jX][jMode] =
							&ad1I3_SH[(nX+jX-1)*nDofSH*4+(jMode-1)*4];
			acmp3I3_SH[2][jX][jMode]--;
		} /* for (jMode) */
	} /* for (jX) */
 
	cmpKy[1] = fKy;
	cmpKy[2] = 0.0;
	cmpKy2[1] = fKy * fKy;
	cmpKy2[2] = 0.0;
	cmpKy3[1] = fabs(fKy * fKy * fKy);
	cmpKy3[2] = 0.0;
	cmpKy4[1] = fKy * fKy * fKy * fKy;
	cmpKy4[2] = 0.0;
	cmpOne[1] = 1.0;
	cmpOne[2] = 0.0;
	cmpIm[1] = 0.0;
	cmpIm[2] = 1.0;
 
	cmpKj[1] = acmp1SH_Poles[1][iMode];
	cmpKj[2] = acmp1SH_Poles[2][iMode];
	C6_mult_CMP(cmpKj2, cmpKj, cmpKj);
	C6_sub_CMP(cmpKj2Ky2, cmpKj2, cmpKy2);
	fAux = 0.5;
	C6_pow_CMP(cmp_sKj2Ky2, cmpKj2Ky2, fAux);
	if (cmp_sKj2Ky2[2] > 0.0)
	{
		cmp_sKj2Ky2[1] = -cmp_sKj2Ky2[1];
		cmp_sKj2Ky2[2] = -cmp_sKj2Ky2[2];
	}
 
	/* Calculate SH integrals */
	cmpIm3_RE = acmp3Im3_SH[1][iX][iMode];
	cmpIm3_IM = acmp3Im3_SH[2][iX][iMode];
	cmpIm2_RE = acmp3Im2_SH[1][iX][iMode];
	cmpIm2_IM = acmp3Im2_SH[2][iX][iMode];
	cmpIm1_RE = acmp3Im1_SH[1][iX][iMode];
	cmpIm1_IM = acmp3Im1_SH[2][iX][iMode];
	cmpI0_RE = acmp3I0_SH[1][iX][iMode];
	cmpI0_IM = acmp3I0_SH[2][iX][iMode];
	cmpI1_RE = acmp3I1_SH[1][iX][iMode];
	cmpI1_IM = acmp3I1_SH[2][iX][iMode];
	cmpI2_RE = acmp3I2_SH[1][iX][iMode];
	cmpI2_IM = acmp3I2_SH[2][iX][iMode];
	cmpI3_RE = acmp3I3_SH[1][iX][iMode];
	cmpI3_IM = acmp3I3_SH[2][iX][iMode];
 
	fX = ad1X[iX - 1];
	if (fX > 0.0) kSign = 1;
	else if (fX == 0.0) kSign = 0;
	else if (fX < 0.0) kSign = -1;
	fX = fabs(fX);
 
	cmpAux1[1] = fX * cmp_sKj2Ky2[2];
	cmpAux1[2] = -fX * cmp_sKj2Ky2[1];
	C6_exp_CMP(cmp_Exp_sKj2Ky2_x, cmpAux1);
 
	cmpAux1[1] = -fabs(fX * fKy);
	cmpAux1[2] = 0.0;
	C6_exp_CMP(cmp_Exp_Ky_x, cmpAux1);
 
	/* Im3,1 */
	cmpAux3[1] = fX * fX / 2.0;
	cmpAux3[2] = 0.0;
	C6_div_CMP(cmpAux1, cmpOne, cmpKj2Ky2);
	cmpAux3[1] -= cmpAux1[1];
	cmpAux3[2] -= cmpAux1[2];
	C6_div_CMP(cmpAux1, cmp_Exp_sKj2Ky2_x, cmpKj2Ky2);
	cmpAux3[1] += cmpAux1[1];
	cmpAux3[2] += cmpAux1[2];
	cmpAux1[1] = 2.0 * cmpKj2Ky2[2];
	cmpAux1[2] = -2.0 * cmpKj2Ky2[1];
	C6_div_CMP(cmpAux2, cmpAux3, cmpAux1);
	cmpAux3[1] = -kSign * cmpAux2[1];
	cmpAux3[2] = -kSign * cmpAux2[2];
	cmpIm3_RE[1] = cmpAux3[1];
	cmpIm3_IM[1] = cmpAux3[2];
 
	/* Im3,2 */
	cmpAux1[1] = fX;
	cmpAux1[2] = 0.0;
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2Ky2);
	cmpAux1[1] = fabs(fKy);
	cmpAux1[2] = 0.0;
	C6_mult_CMP(cmpAux3, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmp_Exp_Ky_x, cmpAux3);
	cmpAux2[1] += cmpAux1[1];
	cmpAux2[2] += cmpAux1[2];
	cmpAux1[1] = 0.0;
	cmpAux1[2] = -fKy * fKy;
	C6_mult_CMP(cmpAux3, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	C6_div_CMP(cmpAux1, cmpAux3, cmp_sKj2Ky2);
	cmpAux2[1] += cmpAux1[1];
	cmpAux2[2] += cmpAux1[2];
	C6_div_CMP(cmpAux3, cmpAux2, cmpKy);
	cmpAux3[1] /= 2.0;
	cmpAux3[2] /= 2.0;
	cmpIm3_RE[2] = cmpAux3[1];
	cmpIm3_IM[2] = cmpAux3[2];
 
	/* Im3,3 */
	C6_div_CMP(cmpAux1, cmpOne, cmpKj2Ky2);
	cmpAux1[1] = -cmpAux1[1];
	cmpAux1[2] = -cmpAux1[2];
	C6_div_CMP(cmpAux2, cmp_Exp_Ky_x, cmpKj2);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	C6_mult_CMP(cmpAux2, cmpKy2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux3, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux3, cmpKj2Ky2);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	cmpAux2[1] = 0.0;
	cmpAux2[2] = 2.0 * fKy * fKy;
	C6_div_CMP(cmpAux3, cmpAux1, cmpAux2);
	cmpIm3_RE[3] = kSign * cmpAux3[1];
	cmpIm3_IM[3] = kSign * cmpAux3[2];
 
	/* Im3,4 */
	cmpAux1[1] = -fX * fX / 2.0;
	cmpAux1[2] = 0.0;
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2Ky2);
	cmpAux1[1] = 2.0 * cmpKy2[1] - cmpKj2[1];
	cmpAux1[2] = 2.0 * cmpKy2[2] - cmpKj2[2];
	C6_div_CMP(cmpAux3, cmpAux1, cmpKy2);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2Ky2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	cmpAux2[1] += cmpAux3[1];
	cmpAux2[2] += cmpAux3[2];
	C6_div_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2);
	cmpAux2[1] += cmpAux3[1];
	cmpAux2[2] += cmpAux3[2];
	C6_mult_CMP(cmpAux1, cmpKy2, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux3, cmpKj2Ky2);
	C6_div_CMP(cmpAux3, cmpAux1, cmpKj2Ky2);
	cmpAux2[1] -= cmpAux3[1];
	cmpAux2[2] -= cmpAux3[2];
	C6_div_CMP(cmpAux3, cmpAux2, cmpIm);
	cmpAux3[1] *= -kSign / 2.0;
	cmpAux3[2] *= -kSign / 2.0;
	cmpIm3_RE[4] = cmpAux3[1];
	cmpIm3_IM[4] = cmpAux3[2];
 
	if (fKy == 0)
	{
		cmpIm3_RE[2] = 0.0;
		cmpIm3_IM[2] = 0.0;
		cmpIm3_RE[4] = 0.0;
		cmpIm3_IM[4] = 0.0;
		cmpIm3_RE[3] = cmpIm3_RE[1];
		cmpIm3_IM[3] = cmpIm3_IM[1];
	} /* if */
 
	/* Im2,1 */
	cmpAux1[1] = cmpIm3_RE[5];
	cmpAux1[2] = cmpIm3_IM[5];
	C6_mult_CMP(cmpAux2, cmpAux1, cmpKj);
	cmpIm2_RE[1] = cmpAux2[1];
	cmpIm2_IM[1] = cmpAux2[2];
 
	/* Im2,2 */
	cmpIm2_RE[2] = cmpIm3_RE[3] *fKy;
	cmpIm2_IM[2] = cmpIm3_IM[3] *fKy;
 
	/* Im2,3 */
	cmpAux1[1] = cmp_Exp_Ky_x[1] / fabs(fKy);
	cmpAux1[2] = cmp_Exp_Ky_x[2] / fabs(fKy);
	C6_div_CMP(cmpAux2, cmp_Exp_sKj2Ky2_x, cmp_sKj2Ky2);
	cmpAux1[1] -= cmpAux2[2];
	cmpAux1[2] += cmpAux2[1];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	cmpIm2_RE[3] = -cmpAux2[1] / 2.0;
	cmpIm2_IM[3] = -cmpAux2[2] / 2.0;
 
	/* Im2,4 */
	cmpIm2_RE[4] = cmpIm3_RE[2] *fKy;
	cmpIm2_IM[4] = cmpIm3_IM[2] *fKy;
 
	if (fKy == 0)
	{
		cmpIm2_RE[2] = 0.0;
		cmpIm2_IM[2] = 0.0;
		cmpIm2_RE[4] = 0.0;
		cmpIm2_IM[4] = 0.0;
		cmpIm2_RE[3] = cmpIm2_RE[1];
		cmpIm2_IM[3] = cmpIm2_IM[1];
	} /* if */
 
	/* Im1,2 */
	cmpIm1_RE[2] = cmpIm2_RE[3] *fKy;
	cmpIm1_IM[2] = cmpIm2_IM[3] *fKy;
 
	/* Im1,3 */
	C6_sub_CMP(cmpAux1, cmp_Exp_Ky_x, cmp_Exp_sKj2Ky2_x);
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpIm1_RE[3] = -kSign * cmpAux1[1] / 2.0;
	cmpIm1_IM[3] = -kSign * cmpAux1[2] / 2.0;
 
	/* Im1,4 */
	cmpIm1_RE[4] = cmpIm2_RE[2] *fKy;
	cmpIm1_IM[4] = cmpIm2_IM[2] *fKy;
 
	/* I0,2 */
	cmpI0_RE[2] = cmpIm1_RE[3] * fKy;
	cmpI0_IM[2] = cmpIm1_IM[3] * fKy;
 
	/* I0,3 */
	C6_mult_CMP(cmpAux1, cmp_sKj2Ky2, cmp_Exp_sKj2Ky2_x);
	cmpAux2[1] = cmp_Exp_Ky_x[1] * fabs(fKy);
	cmpAux2[2] = cmp_Exp_Ky_x[2] * fabs(fKy);
	cmpAux1[1] -= cmpAux2[2];
	cmpAux1[2] += cmpAux2[1];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI0_RE[3] = cmpAux1[1] / 2.0;
	cmpI0_IM[3] = cmpAux1[2] / 2.0;
 
	/* I0,4 */
	cmpI0_RE[4] = cmpIm1_RE[2] * fKy;
	cmpI0_IM[4] = cmpIm1_IM[2] * fKy;
 
	/* I1,2 */
	cmpI1_RE[2] = cmpI0_RE[3] * fKy;
	cmpI1_IM[2] = cmpI0_IM[3] * fKy;
 
	/* I1,3 */
	C6_mult_CMP(cmpAux1, cmpKj2Ky2, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux2, cmpKy2, cmp_Exp_Ky_x);
	cmpAux1[1] += cmpAux2[1];
	cmpAux1[2] += cmpAux2[2];
	C6_div_CMP(cmpAux2, cmpAux1, cmpKj2);
	C6_div_CMP(cmpAux1, cmpAux2, cmpIm);
	cmpI1_RE[3] = kSign * cmpAux1[1] / 2.0;
	cmpI1_IM[3] = kSign * cmpAux1[2] / 2.0;
 
	/* I1,4 */
	cmpI1_RE[4] = cmpI0_RE[2] * fKy;
	cmpI1_IM[4] = cmpI0_IM[2] * fKy;
 
	/* I2,2 */
	cmpI2_RE[2] = cmpI1_RE[3] * fKy;
	cmpI2_IM[2] = cmpI1_IM[3] * fKy;
 
	/* I2,3 */
	C6_mult_CMP(cmpAux1, cmp_sKj2Ky2, cmpKj2Ky2);
	C6_mult_CMP(cmpAux2, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy2);
	cmpAux1[1] *= fabs(fKy);
	cmpAux1[2] *= fabs(fKy);
	cmpAux2[1] += cmpAux1[2];
	cmpAux2[2] -= cmpAux1[1];
	C6_div_CMP(cmpAux1, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpI2_RE[3] = cmpAux2[1] / 2.0;
	cmpI2_IM[3] = cmpAux2[2] / 2.0;
 
	/* I2,4 */
	cmpI2_RE[4] = cmpI1_RE[2] * fKy;
	cmpI2_IM[4] = cmpI1_IM[2] * fKy;
 
	/* I3,2 */
	cmpI3_RE[2] = cmpI2_RE[3] * fKy;
	cmpI3_IM[2] = cmpI2_IM[3] * fKy;
 
	/* I3,3 */
	C6_mult_CMP(cmpAux1, cmpKj2Ky2, cmpKj2Ky2);
	C6_mult_CMP(cmpAux2, cmpAux1, cmp_Exp_sKj2Ky2_x);
	C6_mult_CMP(cmpAux1, cmp_Exp_Ky_x, cmpKy4);
	cmpAux2[1] -= cmpAux1[1];
	cmpAux2[2] -= cmpAux1[2];
	C6_div_CMP(cmpAux1, cmpAux2, cmpKj2);
	C6_div_CMP(cmpAux2, cmpAux1, cmpIm);
	cmpI3_RE[3] = kSign * cmpAux2[1] / 2.0;
	cmpI3_IM[3] = kSign * cmpAux2[2] / 2.0;
 
	/* I3,4 */
	cmpI3_RE[4] = cmpI2_RE[2] * fKy;
	cmpI3_IM[4] = cmpI2_IM[2] * fKy;
} /* CalcTLM_25D_Integrals_GPU_SH */
 
 
void C6_add_CMP
(
	type* cmp_c,
	type* cmp_a,
	type* cmp_b
)
{
	cmp_c[1] = cmp_a[1] + cmp_b[1];
	cmp_c[2] = cmp_a[2] + cmp_b[2];
 
} /* C6_add_CMP */
 
void C6_sub_CMP
(
	type* cmp_c,
	type* cmp_a,
	type* cmp_b
)
{
	cmp_c[1] = cmp_a[1] - cmp_b[1];
	cmp_c[2] = cmp_a[2] - cmp_b[2];
 
} /* C6_sub_CMP */
 
void C6_mult_CMP
(
	type* cmp_c,
	type* cmp_a,
	type* cmp_b
)
{
	cmp_c[1] = cmp_a[1] * cmp_b[1] - cmp_a[2] * cmp_b[2];
	cmp_c[2] = cmp_a[2] * cmp_b[1] + cmp_a[1] * cmp_b[2];
 
} /* C6_div_CMP */
 
void C6_div_CMP
(
	type* cmp_c,
	type* cmp_a,
	type* cmp_b
)
{
	type fNorm;
 
	fNorm = cmp_b[1] * cmp_b[1] + cmp_b[2] * cmp_b[2];
 
	cmp_c[1] = (cmp_a[1] * cmp_b[1] + cmp_a[2] * cmp_b[2]) / fNorm;
	cmp_c[2] = (cmp_a[2] * cmp_b[1] - cmp_a[1] * cmp_b[2]) / fNorm;
 
} /* C6_div_CMP */
 
void C6_exp_CMP
(
	type* cmp_c,
	type* cmp_a
)
{
	type fNorm;
	type fCos;
	type fSin;
 
	fCos = cos(cmp_a[2]);
	fSin = sin(cmp_a[2]);
 
	fNorm = exp(cmp_a[1]);
	cmp_c[1] = fNorm * fCos;
	cmp_c[2] = fNorm * fSin;
 
} /* C6_exp_CMP */
 
void C6_pow_CMP
(
	type* cmp_c,
	type* cmp_a,
	type fb
)
{
	type fNorm;
	type fAngle;
	type fCos;
	type fSin;
 
	fNorm = sqrt(cmp_a[1] * cmp_a[1] + cmp_a[2] * cmp_a[2]);
	fAngle = atan2(cmp_a[2], cmp_a[1]);
	fNorm = pow(fNorm, fb);
	fAngle = fAngle * fb;
	fCos = cos(fAngle);
	fSin = sin(fAngle);
 
	cmp_c[1] = fNorm * fCos;
	cmp_c[2] = fNorm * fSin;
 
} /* C6_pow_CMP */
/* end_of_code */