/// Load 128-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
/// </summary>
/// <param name="mem_addr">Memory address</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128stream_load_si128(void*mem_addr)
{
returnGenericCSharpLoad(mem_addr);
}
// _mm_blend_pd
/// <summary> Blend packed double-precision (64-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control mask</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128blend_pd(v128a,v128b,intimm8)
{
intj;
v128dst=default(v128);
double*dptr=&dst.Double0;
double*aptr=&a.Double0;
double*bptr=&b.Double0;
for(j=0;j<=1;j++)
{
if(0!=(imm8&(1<<j)))
{
dptr[j]=bptr[j];
}
else
{
dptr[j]=aptr[j];
}
}
returndst;
}
// _mm_blend_ps
/// <summary> Blend packed single-precision (32-bit) floating-point elements from "a" and "b" using control mask "imm8", and store the results in "dst". </summary>
/// <summary> Blend packed 8-bit integers from "a" and "b" using "mask", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="mask">Mask</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128blendv_epi8(v128a,v128b,v128mask)
{
intj;
v128dst=default(v128);
byte*dptr=&dst.Byte0;
byte*aptr=&a.Byte0;
byte*bptr=&b.Byte0;
sbyte*mptr=&mask.SByte0;
for(j=0;j<=15;j++)
{
if(mptr[j]<0)
{
dptr[j]=bptr[j];
}
else
{
dptr[j]=aptr[j];
}
}
returndst;
}
// _mm_blend_epi16
/// <summary> Blend packed 16-bit integers from "a" and "b" using control mask "imm8", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">Control mask</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128blend_epi16(v128a,v128b,intimm8)
{
intj;
v128dst=default(v128);
short*dptr=&dst.SShort0;
short*aptr=&a.SShort0;
short*bptr=&b.SShort0;
for(j=0;j<=7;j++)
{
if(0!=((imm8>>j)&1))
{
dptr[j]=bptr[j];
}
else
{
dptr[j]=aptr[j];
}
}
returndst;
}
// _mm_dp_pd
/// <summary> Conditionally multiply the packed double-precision (64-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">High 4 bits in imm8</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128dp_pd(v128a,v128b,intimm8)
{
doublet0=(imm8&0x10)!=0?a.Double0*b.Double0:0.0;
doublet1=(imm8&0x20)!=0?a.Double1*b.Double1:0.0;
doublesum=t0+t1;
v128dst=default(v128);
dst.Double0=(imm8&1)!=0?sum:0.0;
dst.Double1=(imm8&2)!=0?sum:0.0;
returndst;
}
// _mm_dp_ps
/// <summary> Conditionally multiply the packed single-precision (32-bit) floating-point elements in "a" and "b" using the high 4 bits in "imm8", sum the four products, and conditionally store the sum in "dst" using the low 4 bits of "imm8". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="imm8">High 4 bits in imm8</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128dp_ps(v128a,v128b,intimm8)
{
floatt0=(imm8&0x10)!=0?a.Float0*b.Float0:0.0f;
floatt1=(imm8&0x20)!=0?a.Float1*b.Float1:0.0f;
floatt2=(imm8&0x40)!=0?a.Float2*b.Float2:0.0f;
floatt3=(imm8&0x80)!=0?a.Float3*b.Float3:0.0f;
floatsum=t0+t1+t2+t3;
v128dst=default(v128);
dst.Float0=(imm8&1)!=0?sum:0.0f;
dst.Float1=(imm8&2)!=0?sum:0.0f;
dst.Float2=(imm8&4)!=0?sum:0.0f;
dst.Float3=(imm8&8)!=0?sum:0.0f;
returndst;
}
// _mm_extract_ps
/// <summary> Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="imm8">imm8</param>
/// <returns>Integer</returns>
[DebuggerStepThrough]
publicstaticintextract_ps(v128a,intimm8)
{
int*iptr=&a.SInt0;
returniptr[imm8&0x3];
}
// unity extension
/// <summary> Extract a single-precision (32-bit) floating-point element from "a", selected with "imm8", and store the result in "dst" (as a float).</summary>
/// <param name="a">Vector a</param>
/// <param name="imm8">imm8</param>
/// <returns>Float</returns>
[DebuggerStepThrough]
publicstaticfloatextractf_ps(v128a,intimm8)
{
float*fptr=&a.Float0;
returnfptr[imm8&0x3];
}
// _mm_extract_epi8
/// <summary> Extract an 8-bit integer from "a", selected with "imm8", and store the result in the lower element of "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="imm8">imm8</param>
/// <returns>Byte</returns>
[DebuggerStepThrough]
publicstaticbyteextract_epi8(v128a,intimm8)
{
byte*bptr=&a.Byte0;
returnbptr[imm8&0xf];
}
// _mm_extract_epi32
/// <summary> Extract a 32-bit integer from "a", selected with "imm8", and store the result in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="imm8">imm8</param>
/// <returns>Integer</returns>
[DebuggerStepThrough]
publicstaticintextract_epi32(v128a,intimm8)
{
int*iptr=&a.SInt0;
returniptr[imm8&0x3];
}
// _mm_extract_epi64
/// <summary> Extract a 64-bit integer from "a", selected with "imm8", and store the result in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="imm8">imm8</param>
/// <returns>64-bit integer</returns>
[DebuggerStepThrough]
publicstaticlongextract_epi64(v128a,intimm8)
{
long*lptr=&a.SLong0;
returnlptr[imm8&0x1];
}
// _mm_insert_ps
/// <summary> Copy "a" to "tmp", then insert a single-precision (32-bit) floating-point element from "b" into "tmp" using the control in "imm8". Store "tmp" to "dst" using the mask in "imm8" (elements are zeroed out when the corresponding bit is set). </summary>
/// <summary> Copy "a" to "dst", and insert the lower 8-bit integer from "i" into "dst" at the location specified by "imm8". </summary>
/// <param name="a">Vector a</param>
/// <param name="i">lower 8-bit integer</param>
/// <param name="imm8">Location</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128insert_epi8(v128a,bytei,intimm8)
{
v128dst=a;
(&dst.Byte0)[imm8&0xf]=i;
returndst;
}
// _mm_insert_epi32
/// <summary> Copy "a" to "dst", and insert the 32-bit integer "i" into "dst" at the location specified by "imm8". </summary>
/// <param name="a">Vector a</param>
/// <param name="i">32-bit integer</param>
/// <param name="imm8">Location</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128insert_epi32(v128a,inti,intimm8)
{
v128dst=a;
(&dst.SInt0)[imm8&0x3]=i;
returndst;
}
// _mm_insert_epi64
/// <summary> Copy "a" to "dst", and insert the 64-bit integer "i" into "dst" at the location specified by "imm8". </summary>
/// <param name="a">Vector a</param>
/// <param name="i">64-bit integer</param>
/// <param name="imm8">Location</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128insert_epi64(v128a,longi,intimm8)
{
v128dst=a;
(&dst.SLong0)[imm8&0x1]=i;
returndst;
}
// _mm_max_epi8
/// <summary> Compare packed 8-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128max_epi8(v128a,v128b)
{
v128dst=default(v128);
sbyte*dptr=&dst.SByte0;
sbyte*aptr=&a.SByte0;
sbyte*bptr=&b.SByte0;
for(intj=0;j<=15;j++)
{
dptr[j]=Math.Max(aptr[j],bptr[j]);
}
returndst;
}
// _mm_max_epi32
/// <summary> Compare packed 32-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128max_epi32(v128a,v128b)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
int*aptr=&a.SInt0;
int*bptr=&b.SInt0;
for(intj=0;j<=3;j++)
{
dptr[j]=Math.Max(aptr[j],bptr[j]);
}
returndst;
}
// _mm_max_epu32
/// <summary> Compare packed unsigned 32-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128max_epu32(v128a,v128b)
{
v128dst=default(v128);
uint*dptr=&dst.UInt0;
uint*aptr=&a.UInt0;
uint*bptr=&b.UInt0;
for(intj=0;j<=3;j++)
{
dptr[j]=Math.Max(aptr[j],bptr[j]);
}
returndst;
}
// _mm_max_epu16
/// <summary> Compare packed unsigned 16-bit integers in "a" and "b", and store packed maximum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128max_epu16(v128a,v128b)
{
v128dst=default(v128);
ushort*dptr=&dst.UShort0;
ushort*aptr=&a.UShort0;
ushort*bptr=&b.UShort0;
for(intj=0;j<=7;j++)
{
dptr[j]=Math.Max(aptr[j],bptr[j]);
}
returndst;
}
// _mm_min_epi8
/// <summary> Compare packed 8-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128min_epi8(v128a,v128b)
{
v128dst=default(v128);
sbyte*dptr=&dst.SByte0;
sbyte*aptr=&a.SByte0;
sbyte*bptr=&b.SByte0;
for(intj=0;j<=15;j++)
{
dptr[j]=Math.Min(aptr[j],bptr[j]);
}
returndst;
}
// _mm_min_epi32
/// <summary> Compare packed 32-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128min_epi32(v128a,v128b)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
int*aptr=&a.SInt0;
int*bptr=&b.SInt0;
for(intj=0;j<=3;j++)
{
dptr[j]=Math.Min(aptr[j],bptr[j]);
}
returndst;
}
// _mm_min_epu32
/// <summary> Compare packed unsigned 32-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128min_epu32(v128a,v128b)
{
v128dst=default(v128);
uint*dptr=&dst.UInt0;
uint*aptr=&a.UInt0;
uint*bptr=&b.UInt0;
for(intj=0;j<=3;j++)
{
dptr[j]=Math.Min(aptr[j],bptr[j]);
}
returndst;
}
// _mm_min_epu16
/// <summary> Compare packed unsigned 16-bit integers in "a" and "b", and store packed minimum values in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128min_epu16(v128a,v128b)
{
v128dst=default(v128);
ushort*dptr=&dst.UShort0;
ushort*aptr=&a.UShort0;
ushort*bptr=&b.UShort0;
for(intj=0;j<=7;j++)
{
dptr[j]=Math.Min(aptr[j],bptr[j]);
}
returndst;
}
// _mm_packus_epi32
/// <summary> Convert packed 32-bit integers from "a" and "b" to packed 16-bit integers using unsigned saturation, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128packus_epi32(v128a,v128b)
{
v128dst=default(v128);
dst.UShort0=Saturate_To_UnsignedInt16(a.SInt0);
dst.UShort1=Saturate_To_UnsignedInt16(a.SInt1);
dst.UShort2=Saturate_To_UnsignedInt16(a.SInt2);
dst.UShort3=Saturate_To_UnsignedInt16(a.SInt3);
dst.UShort4=Saturate_To_UnsignedInt16(b.SInt0);
dst.UShort5=Saturate_To_UnsignedInt16(b.SInt1);
dst.UShort6=Saturate_To_UnsignedInt16(b.SInt2);
dst.UShort7=Saturate_To_UnsignedInt16(b.SInt3);
returndst;
}
// _mm_cmpeq_epi64
/// <summary> Compare packed 64-bit integers in "a" and "b" for equality, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cmpeq_epi64(v128a,v128b)
{
v128dst=default(v128);
dst.SLong0=a.SLong0==b.SLong0?-1L:0L;
dst.SLong1=a.SLong1==b.SLong1?-1L:0L;
returndst;
}
// _mm_cvtepi8_epi16
/// <summary> Sign extend packed 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi8_epi16(v128a)
{
v128dst=default(v128);
short*dptr=&dst.SShort0;
sbyte*aptr=&a.SByte0;
for(intj=0;j<=7;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepi8_epi32
/// <summary> Sign extend packed 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi8_epi32(v128a)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
sbyte*aptr=&a.SByte0;
for(intj=0;j<=3;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepi8_epi64
/// <summary> Sign extend packed 8-bit integers in the low 8 bytes of "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi8_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
sbyte*aptr=&a.SByte0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepi16_epi32
/// <summary> Sign extend packed 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi16_epi32(v128a)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
short*aptr=&a.SShort0;
for(intj=0;j<=3;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepi16_epi64
/// <summary> Sign extend packed 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi16_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
short*aptr=&a.SShort0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepi32_epi64
/// <summary> Sign extend packed 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepi32_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
int*aptr=&a.SInt0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu8_epi16
/// <summary> Zero extend packed unsigned 8-bit integers in "a" to packed 16-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu8_epi16(v128a)
{
v128dst=default(v128);
short*dptr=&dst.SShort0;
byte*aptr=&a.Byte0;
for(intj=0;j<=7;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu8_epi32
/// <summary> Zero extend packed unsigned 8-bit integers in "a" to packed 32-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu8_epi32(v128a)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
byte*aptr=&a.Byte0;
for(intj=0;j<=3;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu8_epi64
/// <summary> Zero extend packed unsigned 8-bit integers in the low 8 byte sof "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu8_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
byte*aptr=&a.Byte0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu16_epi32
/// <summary> Zero extend packed unsigned 16-bit integers in "a" to packed 32-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu16_epi32(v128a)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
ushort*aptr=&a.UShort0;
for(intj=0;j<=3;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu16_epi64
/// <summary> Zero extend packed unsigned 16-bit integers in "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu16_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
ushort*aptr=&a.UShort0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_cvtepu32_epi64
/// <summary> Zero extend packed unsigned 32-bit integers in "a" to packed 64-bit integers, and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128cvtepu32_epi64(v128a)
{
v128dst=default(v128);
long*dptr=&dst.SLong0;
uint*aptr=&a.UInt0;
for(intj=0;j<=1;j++)
{
dptr[j]=aptr[j];
}
returndst;
}
// _mm_mul_epi32
/// <summary> Multiply the low 32-bit integers from each packed 64-bit element in "a" and "b", and store the signed 64-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128mul_epi32(v128a,v128b)
{
v128dst=default(v128);
dst.SLong0=a.SInt0*(long)b.SInt0;
dst.SLong1=a.SInt2*(long)b.SInt2;
returndst;
}
// _mm_mullo_epi32
/// <summary> Multiply the packed 32-bit integers in "a" and "b", producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128mullo_epi32(v128a,v128b)
{
v128dst=default(v128);
int*dptr=&dst.SInt0;
int*aptr=&a.SInt0;
int*bptr=&b.SInt0;
for(intj=0;j<=3;j++)
{
dptr[j]=aptr[j]*bptr[j];
}
returndst;
}
// _mm_testz_si128
/// <summary> Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "ZF" value. </summary>
/// <summary> Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return the "CF" value. </summary>
/// <summary>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "b", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "b", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</summary>
/// <summary> Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and return 1 if the result is zero, otherwise return 0. </summary>
/// <param name="a">Vector a</param>
/// <param name="mask">Mask</param>
/// <returns>Boolean result</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticinttest_all_zeros(v128a,v128mask)
{
returntestz_si128(a,mask);
}
// _mm_test_mix_ones_zeros
/// <summary>Compute the bitwise AND of 128 bits (representing integer data) in "a" and "mask", and set "ZF" to 1 if the result is zero, otherwise set "ZF" to 0. Compute the bitwise NOT of "a" and then AND with "mask", and set "CF" to 1 if the result is zero, otherwise set "CF" to 0. Return 1 if both the "ZF" and "CF" values are zero, otherwise return 0.</summary>
/// <summary>Compute the bitwise NOT of "a" and then AND with a 128-bit vector containing all 1's, and return 1 if the result is zero, otherwise return 0.></summary>
/// <param name="a">Vector a</param>
/// <returns>Boolean result</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticinttest_all_ones(v128a)
{
returntestc_si128(a,Sse2.cmpeq_epi32(a,a));
}
// Wrapper for C# reference mode to handle FROUND_xxx
// Emulate intel's ceil rounding to zero leaving the data at negative zero
returnnewv128(0x8000_0000_0000_0000).Double0;
}
else
{
returnr;
}
}
case3:returnMath.Truncate(d);
default:
switch(MXCSR&MXCSRBits.RoundingControlMask)
{
caseMXCSRBits.RoundToNearest:returnMath.Round(d);
caseMXCSRBits.RoundDown:returnMath.Floor(d);
caseMXCSRBits.RoundUp:returnMath.Ceiling(d);
default:returnMath.Truncate(d);
}
}
}
// _mm_round_pd
/// <summary> Round the packed double-precision (64-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed double-precision floating-point elements in "dst".</summary>
/// <param name="a">Vector a</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128round_pd(v128a,introunding)
{
v128dst=default(v128);
dst.Double0=RoundDImpl(a.Double0,rounding);
dst.Double1=RoundDImpl(a.Double1,rounding);
returndst;
}
// _mm_floor_pd
/// <summary> Round the packed double-precision (64-bit) floating-point elements in "a" down to an integer value, and store the results as packed double-precision floating-point elements in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticv128floor_pd(v128a)
{
returnround_pd(a,(int)RoundingMode.FROUND_FLOOR);
}
// _mm_ceil_pd
/// <summary> Round the packed double-precision (64-bit) floating-point elements in "a" up to an integer value, and store the results as packed double-precision floating-point elements in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticv128ceil_pd(v128a)
{
returnround_pd(a,(int)RoundingMode.FROUND_CEIL);
}
// _mm_round_ps
/// <summary> Round the packed single-precision (32-bit) floating-point elements in "a" using the "rounding" parameter, and store the results as packed single-precision floating-point elements in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128round_ps(v128a,introunding)
{
v128dst=default(v128);
dst.Float0=(float)RoundDImpl(a.Float0,rounding);
dst.Float1=(float)RoundDImpl(a.Float1,rounding);
dst.Float2=(float)RoundDImpl(a.Float2,rounding);
dst.Float3=(float)RoundDImpl(a.Float3,rounding);
returndst;
}
// _mm_floor_ps
/// <summary> Round the packed single-precision (32-bit) floating-point elements in "a" down to an integer value, and store the results as packed single-precision floating-point elements in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticv128floor_ps(v128a)
{
returnround_ps(a,(int)RoundingMode.FROUND_FLOOR);
}
// _mm_ceil_ps
/// <summary> Round the packed single-precision (32-bit) floating-point elements in "a" up to an integer value, and store the results as packed single-precision floating-point elements in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
[BurstTargetCpu(BurstTargetCpu.X64_SSE4)]
publicstaticv128ceil_ps(v128a)
{
returnround_ps(a,(int)RoundingMode.FROUND_CEIL);
}
// _mm_round_sd
/// <summary> Round the lower double-precision (64-bit) floating-point element in "b" using the "rounding" parameter, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128round_sd(v128a,v128b,introunding)
{
v128dst=default(v128);
dst.Double0=RoundDImpl(b.Double0,rounding);
dst.Double1=a.Double1;
returndst;
}
// _mm_floor_sd
/// <summary> Round the lower double-precision (64-bit) floating-point element in "b" down to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
/// <summary> Round the lower double-precision (64-bit) floating-point element in "b" up to an integer value, store the result as a double-precision floating-point element in the lower element of "dst", and copy the upper element from "a" to the upper element of "dst". </summary>
/// <summary> Round the lower single-precision (32-bit) floating-point element in "b" using the "rounding" parameter, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst".</summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="rounding">Rounding mode</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128round_ss(v128a,v128b,introunding)
{
v128dst=a;
dst.Float0=(float)RoundDImpl(b.Float0,rounding);
returndst;
}
// _mm_floor_ss
/// <summary> Round the lower single-precision (32-bit) floating-point element in "b" down to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </summary>
/// <summary> Round the lower single-precision (32-bit) floating-point element in "b" up to an integer value, store the result as a single-precision floating-point element in the lower element of "dst", and copy the upper 3 packed elements from "a" to the upper elements of "dst". </summary>
/// <summary> Horizontally compute the minimum amongst the packed unsigned 16-bit integers in "a", store the minimum and index in "dst", and zero the remaining bits in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
publicstaticv128minpos_epu16(v128a)
{
intindex=0;
ushortmin=a.UShort0;
ushort*aptr=&a.UShort0;
for(intj=1;j<=7;j++)
{
if(aptr[j]<min)
{
index=j;
min=aptr[j];
}
}
v128dst=default(v128);
dst.UShort0=min;
dst.UShort1=(ushort)index;
returndst;
}
// _mm_mpsadbw_epu8
/// <summary> Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in "a" compared to those in "b", and store the 16-bit results in "dst".</summary>
/// <remarks>Eight SADs are performed using one quadruplet from "b" and eight quadruplets from "a". One quadruplet is selected from "b" starting at on the offset specified in "imm8". Eight quadruplets are formed from sequential 8-bit integers selected from "a" starting at the offset specified in "imm8".</remarks>