试图了解ASM代码

编辑

我从memcmp切换到家庭酿造的13字节比较功能,并且自制软件没有额外的指令。 所以我只能猜测,额外的组装只是优化器中的一个缺陷。

if (!EQ13(&ti, &m_ti)) {  // in 2014, memcmp was not being optimzied here
000007FEF91B2CFE  mov         rdx,qword ptr [rsp]  
000007FEF91B2D02  movzx       eax,byte ptr [rsp+0Ch]  
000007FEF91B2D07  mov         ecx,dword ptr [rsp+8]  
000007FEF91B2D0B  cmp         rdx,qword ptr [r10+28h]  
000007FEF91B2D0F  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D11  cmp         ecx,dword ptr [r10+30h]  
000007FEF91B2D15  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D17  cmp         al,byte ptr [r10+34h]  
000007FEF91B2D1B  je          TSccIter::SetTi+0B1h (7FEF91B2D31h)  

我的自制软件在这种情况下并不完美,因为它在开始时会执行3个mov,即使它不太可能检查通过第一个mov。 我需要为这部分工作。

原始问题

这里是msvc 2010的asm代码,展示了它如何优化一个小的,固定大小的memcmp(在本例中为13个字节)。 我在代码中看到了很多类型的优化,但从来没有用过最后6行。 任何人都可以告诉我为什么最后6条装配线在那里? TransferItem是13个字节,因此解释了QWORD,DWORD,然后是BYTE cmps。

struct TransferItem {
  char m_szCxrMkt1[3];
  char m_szCxrOp1[3];
  char m_chDelimiter;
  char m_szCxrMkt2[3];
  char m_szCxrOp2[3];
};

...

if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E lea         rax,[rsp]  
2B92 mov         rdx,qword ptr [rax]  
2B95 cmp         rdx,qword ptr [r10+28h]  
2B99 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B mov         edx,dword ptr [rax+8]  
2B9E cmp         edx,dword ptr [r10+30h]  
2BA2 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4 movzx       edx,byte ptr [rax+0Ch]  
2BA8 cmp         dl,byte ptr [r10+34h]  
2BAC jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  

2BAE xor         eax,eax  
2BB0 jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2 sbb         eax,eax  
2BB4 sbb         eax,0FFFFFFFFh  
2BB7 test        eax,eax  
2BB9 je          TSccIter::SetTi+0CCh (7FEF9302BDCh) 

另外,我们知道xor eax,eax的重点是零,然后在2bb7上测试已知为零的eax?

这是整个功能

// fWildCard means match certain fields to '**' in the db
// szCxrMkt1,2 are required and cannot be null, '  ', or ''.
// szCxrOp1,2 can be null, '  ', or ''.
TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
    if (m_fSkipSet) 
        return *this;
    m_iSid = -1; // resets the iterator to search from the start
    // Pad the struct to 16 bytes so we can clear it with 2 QWORDS
    // We use a temp, ti, to detect if the new transferitem has changed
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; // get us to 16 bytes
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020;  // 8 spaces
    // copy in the params
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
    if (szCxrOp1 && *szCxrOp1)
        CPY2(ti.m_szCxrOp1, szCxrOp1);
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; // this controls wild card matching  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
    if (szCxrOp2 && *szCxrOp2)
        CPY2(ti.m_szCxrOp2, szCxrOp2);
    // see if different
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
        memcpy(&m_ti, &ti, sizeof(TransferItem));
        m_fQryChanged  = true;
    }
    return *this;
}

typedef unsigned __int64 U8;
#define CPY2(a,b) ((*(WORD*)a) = (*(WORD*)b))

这是整个系统

TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
2B10  sub         rsp,18h  
    if (m_fSkipSet) 
2B14  cmp         byte ptr [rcx+0EAh],0  
2B1B  mov         r10,rcx  
        return *this;
2B1E  jne         TSccIter::SetTi+0CCh (7FEF9302BDCh)  
    m_iSid = -1; 
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; 
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020; 
2B24  mov         rax,2020202020202020h  
2B2E  mov         byte ptr [rcx+36h],0FFh  
2B32  mov         qword ptr [rsp],rax  
2B36  mov         qword ptr [rsp+8],rax  
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
2B3B  movzx       eax,word ptr [r8]  
2B3F  mov         word ptr [rsp],ax  
    if (szCxrOp1 && *szCxrOp1)
2B43  test        r9,r9  
2B46  je          TSccIter::SetTi+47h (7FEF9302B57h)  
2B48  cmp         byte ptr [r9],0  
2B4C  je          TSccIter::SetTi+47h (7FEF9302B57h)  
        CPY2(ti.m_szCxrOp1, szCxrOp1);
2B4E  movzx       eax,word ptr [r9]  
2B52  mov         word ptr [rsp+3],ax  
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; 
2B57  mov         eax,3Ah  
2B5C  mov         ecx,2Ah  
2B61  test        dl,dl  
2B63  cmovne      eax,ecx  
2B66  mov         byte ptr [rsp+6],al  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
2B6A  mov         rax,qword ptr [szCxrMkt2]  
2B6F  movzx       ecx,word ptr [rax]  
    if (szCxrOp2 && *szCxrOp2)
2B72  mov         rax,qword ptr [szCxrOp2]  
2B77  mov         word ptr [rsp+7],cx  
2B7C  test        rax,rax  
2B7F  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
2B81  cmp         byte ptr [rax],0  
2B84  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
        CPY2(ti.m_szCxrOp2, szCxrOp2);
2B86  movzx       eax,word ptr [rax]  
2B89  mov         word ptr [rsp+0Ah],ax  
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E  lea         rax,[rsp]  
2B92  mov         rdx,qword ptr [rax]  
2B95  cmp         rdx,qword ptr [r10+28h]  
2B99  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B  mov         edx,dword ptr [rax+8]  
2B9E  cmp         edx,dword ptr [r10+30h]  
2BA2  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4  movzx       edx,byte ptr [rax+0Ch]  
2BA8  cmp         dl,byte ptr [r10+34h]  
2BAC  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BAE  xor         eax,eax  
2BB0  jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2  sbb         eax,eax  
2BB4  sbb         eax,0FFFFFFFFh  
2BB7  test        eax,eax  
2BB9  je          TSccIter::SetTi+0CCh (7FEF9302BDCh)  
        memcpy(&m_ti, &ti, sizeof(TransferItem));
2BBB  mov         rax,qword ptr [rsp]  
        m_fQryChanged  = true;
2BBF  mov         byte ptr [r10+0E9h],1  
2BC7  mov         qword ptr [r10+28h],rax  
2BCB  mov         eax,dword ptr [rsp+8]  
2BCF  mov         dword ptr [r10+30h],eax  
2BD3  movzx       eax,byte ptr [rsp+0Ch]  
2BD8  mov         byte ptr [r10+34h],al  
    }
    return *this;
2BDC  mov         rax,r10  
}

2bb7在通过采取跳跃:可以通过不同的代码路径到达2b992ba22bac ,以及直接当没有条件跳转的取。 xor eax,eax只在最后一个路径上执行,并确保eax为0 - 这显然不是这种情况。


最后6行返回eax == 0中的值作为匹配,并设置SF和ZF条件代码。


测试eax,eax将测试eax和eax是否为0。

而xor eax,eax是编码“eax = 0”的有效方法。 它比mov eax,0更高效

编辑:最初误读了这个问题。 它看起来会发生在“TSccIter :: SetTi + 0A7h”应该改变值?

此外,SBB技巧复制进位(2BB2-2BB4)在这里解释:

http://compgroups.net/comp.lang.asm.x86/trick-with-sbb-instruction/20164

链接地址: http://www.djcxy.com/p/2491.html

上一篇: Trying to understand ASM code

下一篇: How does loop address alignment affect the speed on Intel x86