Trying to understand ASM code

EDIT

I switched from memcmp to a home brewed 13 byte compare function and the homebrew doesnt have the extra instructions. So all I can guess is that the extra assembly is just a flaw in the optimizer.

if (!EQ13(&ti, &m_ti)) {  // in 2014, memcmp was not being optimzied here
000007FEF91B2CFE  mov         rdx,qword ptr [rsp]  
000007FEF91B2D02  movzx       eax,byte ptr [rsp+0Ch]  
000007FEF91B2D07  mov         ecx,dword ptr [rsp+8]  
000007FEF91B2D0B  cmp         rdx,qword ptr [r10+28h]  
000007FEF91B2D0F  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D11  cmp         ecx,dword ptr [r10+30h]  
000007FEF91B2D15  jne         TSccIter::SetTi+9Dh (7FEF91B2D1Dh)  
000007FEF91B2D17  cmp         al,byte ptr [r10+34h]  
000007FEF91B2D1B  je          TSccIter::SetTi+0B1h (7FEF91B2D31h)  

My homebrew isn't perfect in this case since it does 3 movs at the start even though it is unlikely to ever check past the first mov. I need to work on that part.

ORIGINAL QUESTION

Here is asm code from msvc 2010 showing how it can optimze a small, fixed-sized memcmp (in this case, 13 bytes). I've seen this type of optimization a lot in our code, but never with the last 6 lines. Can anyone tell me why the last 6 lines of assembly are there? TransferItem is 13 bytes so that explains the QWORD, DWORD, then BYTE cmps.

struct TransferItem {
  char m_szCxrMkt1[3];
  char m_szCxrOp1[3];
  char m_chDelimiter;
  char m_szCxrMkt2[3];
  char m_szCxrOp2[3];
};

...

if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E lea         rax,[rsp]  
2B92 mov         rdx,qword ptr [rax]  
2B95 cmp         rdx,qword ptr [r10+28h]  
2B99 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B mov         edx,dword ptr [rax+8]  
2B9E cmp         edx,dword ptr [r10+30h]  
2BA2 jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4 movzx       edx,byte ptr [rax+0Ch]  
2BA8 cmp         dl,byte ptr [r10+34h]  
2BAC jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  

2BAE xor         eax,eax  
2BB0 jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2 sbb         eax,eax  
2BB4 sbb         eax,0FFFFFFFFh  
2BB7 test        eax,eax  
2BB9 je          TSccIter::SetTi+0CCh (7FEF9302BDCh) 

Also what is the point of xor eax,eax which we know will be zero and then testing that for that known to be zero on line 2bb7?

Here is the whole function

// fWildCard means match certain fields to '**' in the db
// szCxrMkt1,2 are required and cannot be null, '  ', or ''.
// szCxrOp1,2 can be null, '  ', or ''.
TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
    if (m_fSkipSet) 
        return *this;
    m_iSid = -1; // resets the iterator to search from the start
    // Pad the struct to 16 bytes so we can clear it with 2 QWORDS
    // We use a temp, ti, to detect if the new transferitem has changed
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; // get us to 16 bytes
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020;  // 8 spaces
    // copy in the params
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
    if (szCxrOp1 && *szCxrOp1)
        CPY2(ti.m_szCxrOp1, szCxrOp1);
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; // this controls wild card matching  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
    if (szCxrOp2 && *szCxrOp2)
        CPY2(ti.m_szCxrOp2, szCxrOp2);
    // see if different
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
        memcpy(&m_ti, &ti, sizeof(TransferItem));
        m_fQryChanged  = true;
    }
    return *this;
}

typedef unsigned __int64 U8;
#define CPY2(a,b) ((*(WORD*)a) = (*(WORD*)b))

And here's the whole asm

TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
2B10  sub         rsp,18h  
    if (m_fSkipSet) 
2B14  cmp         byte ptr [rcx+0EAh],0  
2B1B  mov         r10,rcx  
        return *this;
2B1E  jne         TSccIter::SetTi+0CCh (7FEF9302BDCh)  
    m_iSid = -1; 
    class TransferItemPadded : public TransferItem {
        char padding[16 - sizeof(TransferItem)]; 
    } ti;
    U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020; 
2B24  mov         rax,2020202020202020h  
2B2E  mov         byte ptr [rcx+36h],0FFh  
2B32  mov         qword ptr [rsp],rax  
2B36  mov         qword ptr [rsp+8],rax  
    CPY2(ti.m_szCxrMkt1, szCxrMkt1);
2B3B  movzx       eax,word ptr [r8]  
2B3F  mov         word ptr [rsp],ax  
    if (szCxrOp1 && *szCxrOp1)
2B43  test        r9,r9  
2B46  je          TSccIter::SetTi+47h (7FEF9302B57h)  
2B48  cmp         byte ptr [r9],0  
2B4C  je          TSccIter::SetTi+47h (7FEF9302B57h)  
        CPY2(ti.m_szCxrOp1, szCxrOp1);
2B4E  movzx       eax,word ptr [r9]  
2B52  mov         word ptr [rsp+3],ax  
    ti.m_chDelimiter = (fWildCard) ? '*' : ':'; 
2B57  mov         eax,3Ah  
2B5C  mov         ecx,2Ah  
2B61  test        dl,dl  
2B63  cmovne      eax,ecx  
2B66  mov         byte ptr [rsp+6],al  
    CPY2(ti.m_szCxrMkt2, szCxrMkt2);
2B6A  mov         rax,qword ptr [szCxrMkt2]  
2B6F  movzx       ecx,word ptr [rax]  
    if (szCxrOp2 && *szCxrOp2)
2B72  mov         rax,qword ptr [szCxrOp2]  
2B77  mov         word ptr [rsp+7],cx  
2B7C  test        rax,rax  
2B7F  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
2B81  cmp         byte ptr [rax],0  
2B84  je          TSccIter::SetTi+7Eh (7FEF9302B8Eh)  
        CPY2(ti.m_szCxrOp2, szCxrOp2);
2B86  movzx       eax,word ptr [rax]  
2B89  mov         word ptr [rsp+0Ah],ax  
    if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E  lea         rax,[rsp]  
2B92  mov         rdx,qword ptr [rax]  
2B95  cmp         rdx,qword ptr [r10+28h]  
2B99  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2B9B  mov         edx,dword ptr [rax+8]  
2B9E  cmp         edx,dword ptr [r10+30h]  
2BA2  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BA4  movzx       edx,byte ptr [rax+0Ch]  
2BA8  cmp         dl,byte ptr [r10+34h]  
2BAC  jne         TSccIter::SetTi+0A2h (7FEF9302BB2h)  
2BAE  xor         eax,eax  
2BB0  jmp         TSccIter::SetTi+0A7h (7FEF9302BB7h)  
2BB2  sbb         eax,eax  
2BB4  sbb         eax,0FFFFFFFFh  
2BB7  test        eax,eax  
2BB9  je          TSccIter::SetTi+0CCh (7FEF9302BDCh)  
        memcpy(&m_ti, &ti, sizeof(TransferItem));
2BBB  mov         rax,qword ptr [rsp]  
        m_fQryChanged  = true;
2BBF  mov         byte ptr [r10+0E9h],1  
2BC7  mov         qword ptr [r10+28h],rax  
2BCB  mov         eax,dword ptr [rsp+8]  
2BCF  mov         dword ptr [r10+30h],eax  
2BD3  movzx       eax,byte ptr [rsp+0Ch]  
2BD8  mov         byte ptr [r10+34h],al  
    }
    return *this;
2BDC  mov         rax,r10  
}

2bb7 can be reached by different code paths: via taken jumps at 2b99 , 2ba2 and 2bac , as well as directly when none of the conditional jumps is taken. The xor eax,eax is only executed at the last path, and it ensures that eax is 0 - which is apparently not the case otherwise.


最后6行返回eax == 0中的值作为匹配,并设置SF和ZF条件代码。


test eax, eax will test whether eax AND eax == 0. The following je will jump if zero.

And xor eax, eax is an efficient way to encode "eax = 0". It is more efficient than mov eax, 0

EDIT: Initially misread the question. It looks like something will happen at "TSccIter::SetTi+0A7h" which should change the value?

Also, the SBB trick to replicate the carry(2BB2-2BB4) is explained here:

http://compgroups.net/comp.lang.asm.x86/trick-with-sbb-instruction/20164

链接地址: http://www.djcxy.com/p/2492.html

上一篇: 编译用于高放射性环境的应用程序

下一篇: 试图了解ASM代码