vector: use asm opcode mnemonics
There's no change in the binary output, just less mystery in the asm. These mnemonics were introduced in Go 1.10: https://golang.org/doc/go1.10#asm and https://golang.org/cl/75490 Current stable release (as of 2018-11-10) is Go 1.11, and https://golang.org/doc/devel/release.html#policy says that Go 1.9 and below are therefore no longer supported. Change-Id: I1f9a63521bc8d5e8f8d395605f62bf7fb6a63bc5 Reviewed-on: https://go-review.googlesource.com/c/148997 Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
This commit is contained in:
parent
249dc8530c
commit
46e4eb730a
|
@ -139,17 +139,9 @@ fxAccOpOverLoop4:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -182,13 +174,10 @@ fxAccOpOverLoop4:
|
||||||
PSRLQ $32, X11
|
PSRLQ $32, X11
|
||||||
|
|
||||||
// Multiply by magic, shift by magic.
|
// Multiply by magic, shift by magic.
|
||||||
//
|
PMULULQ X10, X0
|
||||||
// pmuludq %xmm10,%xmm0
|
PMULULQ X10, X11
|
||||||
// pmuludq %xmm10,%xmm11
|
PSRLQ $47, X0
|
||||||
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
|
PSRLQ $47, X11
|
||||||
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
|
|
||||||
PSRLQ $47, X0
|
|
||||||
PSRLQ $47, X11
|
|
||||||
|
|
||||||
// Merge the two registers back to one, X11, and add maskA.
|
// Merge the two registers back to one, X11, and add maskA.
|
||||||
PSLLQ $32, X11
|
PSLLQ $32, X11
|
||||||
|
@ -223,17 +212,9 @@ fxAccOpOverLoop1:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -346,17 +327,9 @@ fxAccOpSrcLoop4:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -390,17 +363,9 @@ fxAccOpSrcLoop1:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -492,17 +457,9 @@ fxAccMaskLoop4:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -534,17 +491,9 @@ fxAccMaskLoop1:
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
|
|
||||||
// z = convertToInt32(y)
|
// z = convertToInt32(y)
|
||||||
// No-op.
|
// No-op.
|
||||||
|
@ -696,13 +645,10 @@ flAccOpOverLoop4:
|
||||||
PSRLQ $32, X11
|
PSRLQ $32, X11
|
||||||
|
|
||||||
// Multiply by magic, shift by magic.
|
// Multiply by magic, shift by magic.
|
||||||
//
|
PMULULQ X10, X0
|
||||||
// pmuludq %xmm10,%xmm0
|
PMULULQ X10, X11
|
||||||
// pmuludq %xmm10,%xmm11
|
PSRLQ $47, X0
|
||||||
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
|
PSRLQ $47, X11
|
||||||
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
|
|
||||||
PSRLQ $47, X0
|
|
||||||
PSRLQ $47, X11
|
|
||||||
|
|
||||||
// Merge the two registers back to one, X11, and add maskA.
|
// Merge the two registers back to one, X11, and add maskA.
|
||||||
PSLLQ $32, X11
|
PSLLQ $32, X11
|
||||||
|
|
|
@ -296,17 +296,9 @@ const (
|
||||||
// y = abs(x)
|
// y = abs(x)
|
||||||
// y >>= 2 // Shift by 2*ϕ - 16.
|
// y >>= 2 // Shift by 2*ϕ - 16.
|
||||||
// y = min(y, fxAlmost65536)
|
// y = min(y, fxAlmost65536)
|
||||||
//
|
PABSD X1, X2
|
||||||
// pabsd %xmm1,%xmm2
|
PSRLL $2, X2
|
||||||
// psrld $0x2,%xmm2
|
PMINUD X5, X2
|
||||||
// pminud %xmm5,%xmm2
|
|
||||||
//
|
|
||||||
// Hopefully we'll get these opcode mnemonics into the assembler for Go
|
|
||||||
// 1.8. https://golang.org/issue/16007 isn't exactly the same thing, but
|
|
||||||
// it's similar.
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x1e; BYTE $0xd1
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x72; BYTE $0xd2; BYTE $0x02
|
|
||||||
BYTE $0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x3b; BYTE $0xd5
|
|
||||||
`
|
`
|
||||||
flClampAndScale = `
|
flClampAndScale = `
|
||||||
// y = x & flSignMask
|
// y = x & flSignMask
|
||||||
|
@ -356,13 +348,10 @@ const (
|
||||||
MOVOU X0, X11
|
MOVOU X0, X11
|
||||||
PSRLQ $32, X11
|
PSRLQ $32, X11
|
||||||
// Multiply by magic, shift by magic.
|
// Multiply by magic, shift by magic.
|
||||||
//
|
PMULULQ X10, X0
|
||||||
// pmuludq %xmm10,%xmm0
|
PMULULQ X10, X11
|
||||||
// pmuludq %xmm10,%xmm11
|
PSRLQ $47, X0
|
||||||
BYTE $0x66; BYTE $0x41; BYTE $0x0f; BYTE $0xf4; BYTE $0xc2
|
PSRLQ $47, X11
|
||||||
BYTE $0x66; BYTE $0x45; BYTE $0x0f; BYTE $0xf4; BYTE $0xda
|
|
||||||
PSRLQ $47, X0
|
|
||||||
PSRLQ $47, X11
|
|
||||||
// Merge the two registers back to one, X11, and add maskA.
|
// Merge the two registers back to one, X11, and add maskA.
|
||||||
PSLLQ $32, X11
|
PSLLQ $32, X11
|
||||||
XORPS X0, X11
|
XORPS X0, X11
|
||||||
|
|
Loading…
Reference in New Issue
Block a user