[PATCH v5 3/8] x86: re-work insn/suffix recognition
H.J. Lu
hjl.tools@gmail.com
Mon Oct 31 16:59:03 GMT 2022
More information about the Binutils mailing list
Mon Oct 31 16:59:03 GMT 2022
- Previous message (by thread): [PATCH v5 3/8] x86: re-work insn/suffix recognition
- Next message (by thread): [PATCH v5 4/8] ix86: don't recognize/derive Q suffix in the common case
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
On Mon, Oct 31, 2022 at 4:40 AM Jan Beulich <jbeulich@suse.com> wrote: > > On 28.10.2022 18:12, H.J. Lu wrote: > > On Fri, Oct 28, 2022 at 2:00 AM Jan Beulich <jbeulich@suse.com> wrote: > >> > >> On 27.10.2022 19:21, H.J. Lu wrote: > >>> On Tue, Oct 25, 2022 at 12:26 AM Jan Beulich <jbeulich@suse.com> wrote: > >>>> @@ -989,13 +976,13 @@ emms, 0xf77, None, CpuMMX, No_bSuf|No_wS > >>>> // copying between Reg64/Mem64 and RegXMM/RegMMX, as is mandated by Intel's > >>>> // spec). AMD's spec, having been in existence for much longer, failed to > >>>> // recognize that and specified movd for 32- and 64-bit operations. > >>>> -movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Reg32|Unspecified|BaseIndex, RegXMM } > >>>> +movd, 0x666e, None, CpuAVX, D|Modrm|Vex128|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Reg32|Unspecified|BaseIndex, RegXMM } > >>>> movd, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|BaseIndex, RegXMM } > >>>> movd, 0x660f6e, None, CpuSSE2, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegXMM } > >>>> movd, 0x660f6e, None, CpuSSE2|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegXMM } > >>>> movd, 0xf6e, None, CpuMMX, D|Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, RegMMX } > >>>> movd, 0xf6e, None, CpuMMX|Cpu64, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg64|BaseIndex, RegMMX } > >>>> -movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> +movq, 0xf37e, None, CpuAVX, Load|Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> movq, 0x66d6, None, CpuAVX, Modrm|Vex=1|Space0F|VexWIG|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, Qword|Unspecified|BaseIndex|RegXMM } > >>>> movq, 0x666e, None, CpuAVX|Cpu64, D|Modrm|Vex=1|Space0F|VexW1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64|SSE2AVX, { Reg64|Unspecified|BaseIndex, RegXMM } > >>>> movq, 0xf30f7e, None, CpuSSE2, Load|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|Qword|BaseIndex|RegXMM, RegXMM } > >>>> @@ -1159,7 +1146,7 @@ andpd<sse2>, 0x660f54, None, <sse2:cpu>, > >>>> cmp<frel>pd<sse2>, 0x660fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Unspecified|BaseIndex, RegXMM } > >>>> cmp<frel>sd<sse2>, 0xf20fc2, <frel:imm>, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|<frel:comm>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ImmExt, { RegXMM|Qword|Unspecified|BaseIndex, RegXMM } > >>>> cmppd<sse2>, 0x660fc2, None, <sse2:cpu>, Modrm|<sse2:attr>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM } > >>>> -cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> +cmpsd<sse2>, 0xf20fc2, None, <sse2:cpu>, Modrm|<sse2:scal>|<sse2:vvvv>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Pass2, { Imm8, Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> comisd<sse2>, 0x660f2f, None, <sse2:cpu>, Modrm|<sse2:scal>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> cvtpi2pd, 0x660f2a, None, CpuSSE2, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, RegXMM } > >>>> cvtpi2pd, 0xf3e6, None, CpuAVX, Modrm|Vex|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM } > >>>> @@ -1184,7 +1171,7 @@ movlpd, 0x6613, None, CpuAVX, Modrm|Vex| > >>>> movlpd, 0x660f12, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex, RegXMM } > >>>> movmskpd<sse2>, 0x660f50, None, <sse2:cpu>, Modrm|<sse2:attr>|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 } > >>>> movntpd<sse2>, 0x660f2b, None, <sse2:cpu>, Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, Xmmword|Unspecified|BaseIndex } > >>>> -movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { Qword|Unspecified|BaseIndex, RegXMM } > >>>> +movsd, 0xf210, None, CpuAVX, D|Modrm|VexLIG|Space0F|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX|Pass2, { Qword|Unspecified|BaseIndex, RegXMM } > >>>> movsd, 0xf210, None, CpuAVX, D|Modrm|Vex=3|Space0F|VexVVVV=1|VexW=1|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|SSE2AVX, { RegXMM, RegXMM } > >>>> movsd, 0xf20f10, None, CpuSSE2, D|Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Qword|Unspecified|BaseIndex|RegXMM, RegXMM } > >>>> movupd<sse2>, 0x660f10, None, <sse2:cpu>, D|Modrm|<sse2:attr>|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|Unspecified|BaseIndex, RegXMM } > >>>> > >>> > >>> All these instructions with Pass2 have a vector register operand. Can you > >>> use it instead? > >> > >> I probably could, but that would go against your request of limiting as > >> much as possible the allocation of a copy of the input line: Far more > >> insns would then also undergo this setup for a possible 2nd parsing pass. > >> It may be possible to limit the set some by taking into account further > >> attributes (like, leaving aside move-with-sign-extend which you object > >> to being corrected, SSE2AVX), but the collection of those checks would > >> likely be clumsy and harder to maintain than the new attribute. For > >> example the consistency check in i386-gen would then not be possible > >> anymore, and hence the whole thing would become _silently_ dependent on > >> the ordering of templates in the file. > > > > Order dependency is the part of the implementation. > > > >> Furthermore keying this condition to insn attributes would be less > >> future proof. While it may not be very likely for new colliding > >> mnemonics to appear, if one would which then doesn't fit the chosen > >> pattern, the tweaking of the conditional would be more fragile than > >> the mere adding of Pass2 in the appropriate template. (Even if you > >> continue to object to it, simply consider the last patch of this series > >> an example.) > >> > > > > I like to avoid the second pass for many common instructions. > > The current assembler may not be ideal. But it works. > > So am I understanding correctly that you'd like me to drop Pass2 again, > accepting that a few too many insns would have the input buffer cloned > then? > Yes. It is too much to require the second pass for common instructions. -- H.J.
- Previous message (by thread): [PATCH v5 3/8] x86: re-work insn/suffix recognition
- Next message (by thread): [PATCH v5 4/8] ix86: don't recognize/derive Q suffix in the common case
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Binutils mailing list