/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "constseq.hh" #include "funcdata.hh" namespace ghidra { const int4 ArraySequence::MINIMUM_SEQUENCE_LENGTH = 4; const int4 ArraySequence::MAXIMUM_SEQUENCE_LENGTH = 0x20000; /// Initialize the sequence with the \b root operation which writes the earliest character in the memory region. /// \param fdata is the function containing the sequence /// \param ct is the data-type of an element in the array /// \param root is the PcodeOp to be interpreted as the root, copying the earliest element ArraySequence::ArraySequence(Funcdata &fdata,Datatype *ct,PcodeOp *root) :data(fdata) { rootOp = root; charType = ct; block = rootOp->getParent(); numElements = 0; } /// The output Varnodes themselves should be verified to only be read outside of the basic block. /// So effectively only LOADs, STOREs, and CALLs can really interfere. Check for these between the given ops. /// \param startOp is the starting op to check /// \param endOp is the ending op /// \return \b true if there is no interference, \b false if there is possible interference bool ArraySequence::interfereBetween(PcodeOp *startOp,PcodeOp *endOp) { startOp = startOp->nextOp(); while(startOp != endOp) { if (startOp->getEvalType() == PcodeOp::special) { OpCode opc = startOp->code(); if (opc != CPUI_INDIRECT && opc != CPUI_CALLOTHER && opc != CPUI_SEGMENTOP && opc != CPUI_CPOOLREF && opc != CPUI_NEW) return false; } startOp = startOp->nextOp(); } return true; } /// Sort the ops based on block order. Starting with the root op, walk backward until an interfering /// gap is found or until the earliest op is reached. Similarly, walk forward until an interfering gap is found. /// Truncate the op array to be this smaller set. If too many were truncated, return \b false. /// \return \b true if a maximal set of ops is found containing at the least the minimum number required bool ArraySequence::checkInterference(void) { sort(moveOps.begin(),moveOps.end()); // Sort ops based on basic block order int4 pos; for(pos=0;pos=0;--startingPos) { PcodeOp *prevOp = moveOps[startingPos].op; if (!interfereBetween(prevOp,curOp)) break; curOp = prevOp; } startingPos += 1; curOp = moveOps[pos].op; for(endingPos=pos+1;endingPos < moveOps.size();++endingPos) { PcodeOp *nextOp = moveOps[endingPos].op; if (!interfereBetween(curOp,nextOp)) break; curOp = nextOp; } if (endingPos- startingPos < MINIMUM_SEQUENCE_LENGTH) return false; if (startingPos > 0) { for(int4 i=startingPos;i used(sz,0); int4 elSize = charType->getSize(); for(int4 i=0;i sz) continue; uint8 val = moveOps[i].op->getIn(slot)->getOffset(); used[bytePos] = (val == 0) ? 2 : 1; // Mark byte as used, a 2 indicates a null terminator if (bigEndian) { for(int4 j=0;j> (elSize-1-j)*8) & 0xff; byteArray[bytePos+j] = b; } } else { for(int4 j=0;j>= 8; } } } int4 bigElSize = charType->getAlignSize(); int4 maxEl = used.size() / bigElSize; int4 count; for(count=0;count finalOps; for(int4 i=0;itypes; if (charType == types->getTypeChar(types->getSizeOfChar())) { index = numElements; return UserPcodeOp::BUILTIN_STRNCPY; } else if (charType == types->getTypeChar(types->getSizeOfWChar())) { index = numElements; return UserPcodeOp::BUILTIN_WCSNCPY; } index = numElements * charType->getAlignSize(); return UserPcodeOp::BUILTIN_MEMCPY; } /// \brief Set-up for recovering COPY ops into a memory range, given a Symbol and an Address being COPYed into /// /// The SymbolEntry and Address are passed in, with an expected data-type. Check if there is an array /// of the data-type within the Symbol, and if so, initialize the memory range for the the sequence. /// Follow on with gathering PcodeOps and testing if the sequence is viable. If not, the the size the memory /// range will be set to zero. /// \param fdata is the function containing the root COPY /// \param ct is the specific data-type for which there should be an array /// \param ent is the given Symbol /// \param root is the COPY holding the constant /// \param addr is the Address being COPYed into StringSequence::StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,PcodeOp *root,const Address &addr) : ArraySequence(fdata,ct,root) { rootAddr = addr; entry = ent; if (entry->getAddr().getSpace() != addr.getSpace()) return; int8 off = rootAddr.getOffset() - entry->getFirst(); if (off >= entry->getSize()) return; if (rootOp->getIn(0)->getOffset() == 0) return; Datatype *parentType = entry->getSymbol()->getType(); Datatype *arrayType = (Datatype *)0; int8 lastOff = 0; do { if (parentType == ct) break; arrayType = parentType; lastOff = off; parentType = parentType->getSubType(off, &off); } while(parentType != (Datatype *)0); if (parentType != ct || arrayType == (Datatype *)0 || arrayType->getMetatype() != TYPE_ARRAY) return; startAddr = rootAddr - lastOff; if (!collectCopyOps(arrayType->getSize())) return; if (!checkInterference()) return; int4 arrSize = arrayType->getSize() - (int4)(rootAddr.getOffset() - startAddr.getOffset()); numElements = formByteArray(arrSize,0,rootAddr.getOffset(),rootAddr.isBigEndian()); } /// The COPYs must be in the same basic block. /// If any COPY size does not match the \b copyType, return \b false. /// If there is a COPY to the array entry before rootVn, return \b false. /// Otherwise earlier COPYs are skipped. No COPYs are collected after the first gap (entry with no COPY to it). /// \param size is the number of bytes in the memory region /// \return \b true to indicate legal COPY ops of constants were recovered. bool StringSequence::collectCopyOps(int4 size) { Address endAddr = startAddr + (size - 1); // startAddr - endAddr bounds the formal array Address beginAddr = startAddr; // Start search for COPYs at the start of the array if (startAddr != rootAddr) { beginAddr = rootAddr - charType->getAlignSize(); // or the first address before the root address (whichever is later) } VarnodeLocSet::const_iterator iter = data.beginLoc(beginAddr); VarnodeLocSet::const_iterator enditer = data.endLoc(endAddr); int4 diff = rootAddr.getOffset() - startAddr.getOffset(); while(iter != enditer) { Varnode *vn = *iter; ++iter; if (!vn->isWritten()) continue; PcodeOp *op = vn->getDef(); if (op->code() != CPUI_COPY) continue; if (op->getParent() != block) continue; if (!op->getIn(0)->isConstant()) continue; if (vn->getSize() != charType->getSize()) return false; // COPY is the wrong size (has yet to be split) int4 tmpDiff = vn->getOffset() - startAddr.getOffset(); if (tmpDiff < diff) { if (tmpDiff + charType->getAlignSize() == diff) return false; // COPY to previous element, rootVn is not the first in sequence continue; } else if (tmpDiff > diff) { if (tmpDiff - diff < charType->getAlignSize()) continue; if (tmpDiff - diff > charType->getAlignSize()) break; // Gap in COPYs diff = tmpDiff; // Advanced by one character } moveOps.emplace_back(vn->getOffset(),op,-1); } return (moveOps.size() >= MINIMUM_SEQUENCE_LENGTH); } /// \brief Construct a Varnode, with data-type, that acts as a pointer (in)to the Symbol to the root Address /// /// First, a PTRSUB is built from the base register to the Symbol. Then depending on its data-type, additional /// PTRSUBs and PTRADDs are buit to get from the start of the Symbol to the memory region holding the character data. /// All the new Varnodes have the appropriate pointer data-type set. The final Varnode holding the pointer to /// the memory region is returned. /// \param insertPoint is the point before which all new PTRSUBs and PTRADDs are inserted Varnode *StringSequence::constructTypedPointer(PcodeOp *insertPoint) { Varnode *spacePtr; AddrSpace *spc = rootAddr.getSpace(); TypeFactory *types = data.getArch()->types; if (spc->getType() == IPTR_SPACEBASE) spacePtr = data.constructSpacebaseInput(spc); else spacePtr = data.constructConstSpacebase(spc); Datatype *baseType = entry->getSymbol()->getType(); PcodeOp *ptrsub = data.newOp(2, insertPoint->getAddr()); data.opSetOpcode(ptrsub, CPUI_PTRSUB); data.opSetInput(ptrsub,spacePtr,0); uintb baseOff = AddrSpace::byteToAddress(entry->getFirst(),spc->getWordSize()); // Convert to address units data.opSetInput(ptrsub,data.newConstant(spacePtr->getSize(), baseOff),1); spacePtr = data.newUniqueOut(spacePtr->getSize(), ptrsub); data.opInsertBefore(ptrsub, insertPoint); TypePointer *curType = types->getTypePointerStripArray(spacePtr->getSize(), baseType, spc->getWordSize()); spacePtr->updateType(curType, false, false); int8 curOff = rootAddr.getOffset() - entry->getFirst(); while(baseType != charType) { int4 elSize = -1; if (baseType->getMetatype() == TYPE_ARRAY) elSize = ((TypeArray *)baseType)->getBase()->getAlignSize(); int8 newOff; baseType = baseType->getSubType(curOff, &newOff ); if (baseType == (Datatype *)0) break; curOff -= newOff; baseOff = AddrSpace::byteToAddress(curOff, spc->getWordSize()); if (elSize >= 0) { if (curOff == 0) { // Don't create a PTRADD( #0, ...) // spacePtr already has data-type with ARRAY stripped // baseType is already updated continue; } ptrsub = data.newOp(3, insertPoint->getAddr()); data.opSetOpcode(ptrsub, CPUI_PTRADD); int8 numEl = curOff / elSize; data.opSetInput(ptrsub,data.newConstant(4, numEl),1); data.opSetInput(ptrsub,data.newConstant(4,elSize),2); } else { ptrsub = data.newOp(2, insertPoint->getAddr()); data.opSetOpcode(ptrsub, CPUI_PTRSUB); data.opSetInput(ptrsub,data.newConstant(spacePtr->getSize(), baseOff), 1); } data.opSetInput(ptrsub,spacePtr,0); spacePtr = data.newUniqueOut(spacePtr->getSize(), ptrsub); data.opInsertBefore(ptrsub, insertPoint); curType = types->getTypePointerStripArray(spacePtr->getSize(), baseType, spc->getWordSize()); spacePtr->updateType(curType, false, false); curOff = newOff; } if (curOff != 0) { PcodeOp *addOp = data.newOp(2, insertPoint->getAddr()); data.opSetOpcode(addOp, CPUI_INT_ADD); data.opSetInput(addOp, spacePtr, 0); baseOff = AddrSpace::byteToAddress(curOff, spc->getWordSize()); data.opSetInput(addOp, data.newConstant(spacePtr->getSize(), baseOff), 1); spacePtr = data.newUniqueOut(spacePtr->getSize(), addOp); data.opInsertBefore(addOp, insertPoint); curType = types->getTypePointer(spacePtr->getSize(), charType, spc->getWordSize()); spacePtr->updateType(curType, false, false); } return spacePtr; } /// A built-in user-op that copies string data is created. Its first (destination) parameter is constructed /// as a pointer to the array holding the character data, which may be nested in other arrays or structures. /// The second (source) parameter is an \e internal \e string constructed from the \b byteArray. The /// third parameter is the constant indicating the length of the string. The \e user-op is inserted just before /// the last PcodeOp moving a character into the memory region. /// \return the constructed PcodeOp representing the \b memcpy PcodeOp *StringSequence::buildStringCopy(void) { PcodeOp *insertPoint = moveOps[0].op; // Earliest COPY in the block int4 numBytes = moveOps.size() * charType->getSize(); Architecture *glb = data.getArch(); TypeFactory *types = glb->types; Datatype *charPtrType = types->getTypePointer(types->getSizeOfPointer(),charType,rootAddr.getSpace()->getWordSize()); Varnode *srcPtr = data.getInternalString(byteArray.data(), numBytes, charPtrType, insertPoint); if (srcPtr == (Varnode *)0) return (PcodeOp *)0; int4 index; uint4 builtInId = selectStringCopyFunction(index); glb->userops.registerBuiltin(builtInId); PcodeOp *copyOp = data.newOp(4,insertPoint->getAddr()); data.opSetOpcode(copyOp, CPUI_CALLOTHER); data.opSetInput(copyOp, data.newConstant(4, builtInId), 0); Varnode *destPtr = constructTypedPointer(insertPoint); data.opSetInput(copyOp, destPtr, 1); data.opSetInput(copyOp, srcPtr, 2); Varnode *lenVn = data.newConstant(4,index); lenVn->updateType(copyOp->inputTypeLocal(3), false, false); data.opSetInput(copyOp, lenVn, 3); data.opInsertBefore(copyOp, insertPoint); return copyOp; } /// \brief Analyze output descendants of the given PcodeOp being removed /// /// Record any \b points where the output is being read, for later replacement. /// Keep track of CPUI_PIECE ops whose input is from a PcodeOp being removed, and if both inputs are /// visited, remove the input \e points and add the CPUI_PIECE to the list of PcodeOps being removed. /// \param curNode is the given PcodeOp being removed /// \param xref are the set of CPUI_PIECE ops with one input visited /// \param points is the set of input points whose PcodeOp is being removed /// \param deadOps is the current collection of PcodeOps being removed void StringSequence::removeForward(const WriteNode &curNode,map::iterator> &xref, list &points,vector &deadOps) { Varnode *vn = curNode.op->getOut(); list::const_iterator iter; for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { PcodeOp *op = *iter; map::iterator>::iterator miter = xref.find(op); if (miter != xref.end()) { // We have seen the PIECE twice uint8 off = (*(*miter).second).offset; if (curNode.offset < off) off = curNode.offset; points.erase((*miter).second); deadOps.emplace_back(off,op,-1); } else { int4 slot = op->getSlot(vn); points.emplace_back(curNode.offset,op,slot); if (op->code() == CPUI_PIECE) { list::iterator xrefIter = points.end(); --xrefIter; xref[op] = xrefIter; } } } } /// The COPY ops are removed. Any descendants of the COPY output are redefined with an INDIRECT around /// the a CALLOTHER op. If the COPYs feed into a PIECE op (as part of a CONCAT stack), the PIECE is removed /// as well, which may cascade into removal of other PIECE ops in the stack. /// \param replaceOp is the CALLOTHER op creating the INDIRECT effect void StringSequence::removeCopyOps(PcodeOp *replaceOp) { map::iterator> concatSet; list points; vector deadOps; for(int4 i=0;i::iterator iter=points.begin();iter!=points.end();++iter) { PcodeOp *op = (*iter).op; Varnode *vn = op->getIn((*iter).slot); if (vn->getDef()->code() != CPUI_INDIRECT) { Varnode *newIn = data.newConstant(vn->getSize(),0); PcodeOp *indOp = data.newOp(2, replaceOp->getAddr()); data.opSetOpcode(indOp,CPUI_INDIRECT); data.opSetInput(indOp,newIn,0); data.opSetInput(indOp,data.newVarnodeIop(replaceOp),1); data.opSetOutput(indOp, vn); data.markIndirectCreation(indOp, false); data.opInsertBefore(indOp,replaceOp); } } for(int4 i=0;iisWritten()) { PcodeOp *op = basePointer->getDef(); OpCode opc = op->code(); if (opc == CPUI_PTRADD) { int8 sz = op->getIn(2)->getOffset(); if (sz != ptrAddMult) break; } else if (opc != CPUI_COPY) break; basePointer = op->getIn(0); } } /// Back-track from \b basePointer through PTRSUBs, PTRADDs, and INT_ADDs to an earlier root, keeping track /// of any offsets. If an earlier root exists, trace forward, through ops trying to match the offsets. /// For trace of ops whose offsets match exactly, the resulting Varnode is added to the list of duplicates. /// \param duplist will hold the list of duplicate Varnodes (including \b basePointer) void HeapSequence::findDuplicateBases(vector &duplist) { if (!basePointer->isWritten()) { duplist.push_back(basePointer); return; } PcodeOp *op = basePointer->getDef(); OpCode opc = op->code(); if ((opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) || !op->getIn(1)->isConstant()) { duplist.push_back(basePointer); return; } Varnode *copyRoot = basePointer; vector offset; do { uintb off = op->getIn(1)->getOffset(); if (opc == CPUI_PTRADD) off *= op->getIn(2)->getOffset(); offset.push_back(off); copyRoot = op->getIn(0); if (!copyRoot->isWritten()) break; op = copyRoot->getDef(); opc = op->code(); if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) break; } while(op->getIn(1)->isConstant()); duplist.push_back(copyRoot); vector midlist; for(int4 i=offset.size()-1;i>=0;--i) { duplist.swap(midlist); duplist.clear(); for(int4 j=0;j::const_iterator iter = vn->beginDescend(); while(iter != vn->endDescend()) { op = *iter; ++iter; opc = op->code(); if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) continue; if (op->getIn(0) != vn || !op->getIn(1)->isConstant()) continue; uintb off = op->getIn(1)->getOffset(); if (opc == CPUI_PTRADD) off *= op->getIn(2)->getOffset(); if (off != offset[i]) continue; duplist.push_back(op->getOut()); } } } } /// Find STOREs with pointers derived from the \b basePointer and that are in the same /// basic block as the root STORE. The root STORE is \e not included in the resulting set. /// \param stores holds the collected STOREs void HeapSequence::findInitialStores(vector &stores) { vector ptradds; findDuplicateBases(ptradds); int4 pos = 0; while(pos < ptradds.size()) { Varnode *vn = ptradds[pos]; pos += 1; list::const_iterator iter; for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { PcodeOp *op = *iter; OpCode opc = op->code(); if (opc == CPUI_PTRADD) { if (op->getIn(0) != vn) continue; // We only check array element size here, if we checked the data-type, we would // need to take into account different pointer styles to the same element data-type if (op->getIn(2)->getOffset() != ptrAddMult) continue; ptradds.push_back(op->getOut()); } else if (opc == CPUI_COPY) { ptradds.push_back(op->getOut()); } else if (opc == CPUI_STORE && op->getParent() == block && op != rootOp) { if (op->getIn(1) != vn) continue; stores.push_back(op); } } } } /// \brief Recursively walk an ADD tree from a given root, collecting offsets and non-constant elements /// /// The constant offsets are returned as a final summed offset. Any non-constant Varnodes encountered are /// passed back in a list. Recursion is depth limited. /// \param vn is the given root of ADD tree /// \param nonConst will hold the list of non-constant Varnodes in the tree /// \param maxDepth is the maximum recursion depth /// \return the sum of all constant offsets uint8 HeapSequence::calcAddElements(Varnode *vn,vector &nonConst,int4 maxDepth) { if (vn->isConstant()) return vn->getOffset(); if (!vn->isWritten()||vn->getDef()->code() != CPUI_INT_ADD || maxDepth == 0) { nonConst.push_back(vn); return 0; } uint8 res = calcAddElements(vn->getDef()->getIn(0),nonConst,maxDepth-1); res += calcAddElements(vn->getDef()->getIn(1),nonConst,maxDepth-1); return res; } /// \brief Calculate the byte offset and any non-constant additive elements between the given Varnode and the \b basePointer /// /// Walk backward from the given Varnode thru PTRADDs and COPYs, summing any offsets encountered. /// Any non-constant Varnodes encountered in the path, that are not themselves a pointer, are passed back in a list. /// \param vn is the given Varnode to trace back to the \b basePointer /// \param nonConst will hold the list of non-constant Varnodes being passed back /// \return the sum off constant offsets on the path in byte units uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector &nonConst) { uint8 res = 0; while(vn->isWritten()) { PcodeOp *op = vn->getDef(); OpCode opc = op->code(); if (opc == CPUI_PTRADD) { uint8 mult = op->getIn(2)->getOffset(); if (mult != ptrAddMult) break; uint8 off = calcAddElements(op->getIn(1),nonConst,3); off *= mult; res += off; vn = op->getIn(0); } else if (opc == CPUI_COPY) { vn = op->getIn(0); } else break; } return AddrSpace::addressToByteInt(res, storeSpace->getWordSize()); } /// \brief Determine if two sets of Varnodes are equal /// /// The sets are passed in as arrays that are assumed sorted. If the sets contain the /// exact same Varnodes, \b true is returned, \b false otherwise. /// \param op1 is the first set /// \param op2 is the second set /// \return \b true if and only if the sets are equal bool HeapSequence::setsEqual(const vector &op1,const vector &op2) { if (op1.size() != op2.size()) return false; for(int4 i=0;igetIn(2); if (!vn->isConstant()) return false; if (vn->getSize() != charType->getSize()) return false; return true; } /// Walk forward from the base pointer to all STORE ops from that pointer, keeping track of the offset. /// The final set of STOREs will all be in the same basic block as the root STORE and have /// a greater than or equal offset. If the minimum sequence size is reached, \b true is returned. /// \return \b true if the minimum number of STOREs is collected. bool HeapSequence::collectStoreOps(void) { vector initStores; findInitialStores(initStores); if (initStores.size() + 1 < MINIMUM_SEQUENCE_LENGTH) return false; uint8 maxSize = MAXIMUM_SEQUENCE_LENGTH * charType->getAlignSize(); // Maximum bytes uint8 wrapMask = calc_mask(storeSpace->getAddrSize()); baseOffset = calcPtraddOffset(rootOp->getIn(1), nonConstAdds); vector nonConstComp; for(int4 i=0;igetIn(1), nonConstComp); uint8 diff = (curOffset - baseOffset) & wrapMask; // Allow wrapping relative to base pointer if (setsEqual(nonConstAdds, nonConstComp)) { if (diff >= maxSize) return false; // Root is not the earliest STORE, or offsets span range larger then maxSize if (!testValue(op)) return false; moveOps.emplace_back(diff,op,-1); } } moveOps.emplace_back(0,rootOp,-1); return true; } /// A built-in user-op that copies string data is created. Its first (destination) parameter is /// the base pointer of the STOREs. with the base offset added to it. /// The second (source) parameter is an \e internal \e string constructed from the \b byteArray. The /// third parameter is the constant indicating the length of the string. The \e user-op is inserted just before /// the last PcodeOp moving a character into the memory region. /// \return the constructed PcodeOp representing the \b memcpy PcodeOp *HeapSequence::buildStringCopy(void) { PcodeOp *insertPoint = moveOps[0].op; // Earliest STORE in the block Datatype *charPtrType = rootOp->getIn(1)->getTypeReadFacing(rootOp); int4 numBytes = numElements * charType->getSize(); Architecture *glb = data.getArch(); Varnode *srcPtr = data.getInternalString(byteArray.data(), numBytes, charPtrType, insertPoint); if (srcPtr == (Varnode *)0) return (PcodeOp *)0; Varnode *destPtr = basePointer; if (baseOffset != 0 || !nonConstAdds.empty()) { // Create the index Varnode Varnode *indexVn = (Varnode *)0; Datatype *intType = glb->types->getBase(basePointer->getSize(), TYPE_INT); if (nonConstAdds.size() > 0) { // Add in any non-constant Varnodes indexVn = nonConstAdds[0]; for(int4 i=1;igetAddr()); data.opSetOpcode(addOp, CPUI_INT_ADD); data.opSetInput(addOp, indexVn, 0); data.opSetInput(addOp, nonConstAdds[i],1); indexVn = data.newUniqueOut(indexVn->getSize(), addOp); indexVn->updateType(intType, false, false); data.opInsertBefore(addOp, insertPoint); } } if (baseOffset != 0) { // Add in any non-zero constant uint8 numEl = baseOffset / charType->getAlignSize(); Varnode *cvn = data.newConstant(basePointer->getSize(), numEl); cvn->updateType(intType, false, false); if (indexVn == (Varnode *)0) indexVn = cvn; else { PcodeOp *addOp = data.newOp(2,insertPoint->getAddr()); data.opSetOpcode(addOp, CPUI_INT_ADD); data.opSetInput(addOp, indexVn, 0); data.opSetInput(addOp, cvn,1); indexVn = data.newUniqueOut(indexVn->getSize(), addOp); indexVn->updateType(intType, false, false); data.opInsertBefore(addOp, insertPoint); } } PcodeOp *ptrAdd = data.newOp(3,insertPoint->getAddr()); data.opSetOpcode(ptrAdd, CPUI_PTRADD); destPtr = data.newUniqueOut(basePointer->getSize(), ptrAdd); data.opSetInput(ptrAdd,basePointer,0); data.opSetInput(ptrAdd,indexVn,1); data.opSetInput(ptrAdd,data.newConstant(basePointer->getSize(), charType->getAlignSize()),2); destPtr->updateType(charPtrType, false, false); data.opInsertBefore(ptrAdd, insertPoint); } int4 index; uint4 builtInId = selectStringCopyFunction(index); glb->userops.registerBuiltin(builtInId); PcodeOp *copyOp = data.newOp(4,insertPoint->getAddr()); data.opSetOpcode(copyOp, CPUI_CALLOTHER); data.opSetInput(copyOp, data.newConstant(4, builtInId), 0); data.opSetInput(copyOp, destPtr, 1); data.opSetInput(copyOp, srcPtr, 2); Varnode *lenVn = data.newConstant(4,index); lenVn->updateType(copyOp->inputTypeLocal(3), false, false); data.opSetInput(copyOp, lenVn, 3); data.opInsertBefore(copyOp, insertPoint); return copyOp; } /// \brief Gather INDIRECT ops attached to the final sequence STOREs and their input/output Varnode pairs /// /// There may be chained INDIRECTs for a single storage location as it crosses multiple STORE ops. Only /// the initial input and final output are gathered. /// \param indirects will hold the INDIRECT ops attached to sequence STOREs /// \param pairs will hold Varnode pairs where the first in the pair is the input and the second is the output void HeapSequence::gatherIndirectPairs(vector &indirects,vector &pairs) { for(int4 i=0;ipreviousOp(); while(op != (PcodeOp *)0) { if (op->code() != CPUI_INDIRECT) break; op->setMark(); indirects.push_back(op); op = op->previousOp(); } } for(int4 i=0;igetOut(); bool hasUse = false; list::const_iterator iter; for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { PcodeOp *useOp = *iter; if (!useOp->isMark()) { // Look for read of outvn that is not by another STORE INDIRECT hasUse = true; break; } } if (hasUse) { // If it has another use Varnode *invn = op->getIn(0); while(invn->isWritten()) { PcodeOp *defOp = invn->getDef(); // Trace back to input Varnode that is not defined by a STORE INDIRECT if (!defOp->isMark()) break; invn = defOp->getIn(0); } pairs.push_back(invn); pairs.push_back(outvn); data.opUnsetOutput(op); } } for(int4 i=0;iclearMark(); } /// \brief Remove the given PcodeOp and any other ops that uniquely produce its inputs /// /// The given PcodeOp is always removed. PcodeOps are recursively removed, if the only data-flow /// path of their output is to the given op, and they are not a CALL or are otherwise special. /// \param op is the given PcodeOp to remove /// \param scratch is scratch space for holding void HeapSequence::removeRecursive(PcodeOp *op,vector &scratch) { scratch.clear(); scratch.push_back(op); int4 pos = 0; while(pos < scratch.size()) { op = scratch[pos]; pos += 1; for(int4 i=0;inumInput();++i) { Varnode *vn = op->getIn(i); if (!vn->isWritten() || vn->isAutoLive()) continue; if (vn->loneDescend() == (PcodeOp *)0) continue; PcodeOp *defOp = vn->getDef(); if (defOp->isCall() || defOp->isIndirectSource()) continue; scratch.push_back(defOp); } data.opDestroy(op); } } /// If the STORE pointer no longer has any other uses, remove the PTRADD producing it, recursively, /// up to the base pointer. INDIRECT ops surrounding any STORE that is removed are replaced with /// INDIRECTs around the user-op replacing the STOREs. /// \param replaceOp is the user-op replacement for the STOREs void HeapSequence::removeStoreOps(PcodeOp *replaceOp) { vector indirects; vector indirectPairs; vector scratch; gatherIndirectPairs(indirects, indirectPairs); for(int4 i=0;igetAddr()); data.opSetOpcode(newInd, CPUI_INDIRECT); data.opSetOutput(newInd,outvn); data.opSetInput(newInd,invn,0); data.opSetInput(newInd,data.newVarnodeIop(replaceOp),1); data.opInsertBefore(newInd, replaceOp); } } /// \brief Constructor for the sequence of STORE ops /// /// From a given STORE op, construct the sequence of STOREs off of the same root pointer. /// The STOREs must be in the same basic block. They can be out of order but must fill out a contiguous /// region of memory with a minimum number of character elements. The values being stored are accumulated /// in a byte array. The initial STORE must have the earliest offset in the sequence. If a sequence /// matching these conditions isn't found, the constructed object will be in an invalid state, and /// isInvalid() will return \b true. /// \param fdata is the function containing the sequence /// \param ct is the character data-type being STOREd /// \param root is the given (putative) initial STORE in the sequence HeapSequence::HeapSequence(Funcdata &fdata,Datatype *ct,PcodeOp *root) : ArraySequence(fdata,ct,root) { baseOffset = 0; storeSpace = root->getIn(0)->getSpaceFromConst(); ptrAddMult = AddrSpace::byteToAddressInt(charType->getAlignSize(), storeSpace->getWordSize()); findBasePointer(rootOp->getIn(1)); if (!collectStoreOps()) return; if (!checkInterference()) return; int4 arrSize = moveOps.size() * charType->getAlignSize(); bool bigEndian = storeSpace->isBigEndian(); numElements = formByteArray(arrSize, 2, 0, bigEndian); } /// The user-op representing the string move is created and all the STORE ops are removed. /// If successful \b true is returned. The transform fails (only) if the accumulated bytes do not /// represent a legal unicode string. /// \return \b true if STOREs are successfully converted to a user-op with a string representation bool HeapSequence::transform(void) { PcodeOp *memCpyOp = buildStringCopy(); if (memCpyOp == (PcodeOp *)0) return false; removeStoreOps(memCpyOp); return true; } void RuleStringCopy::getOpList(vector &oplist) const { oplist.push_back(CPUI_COPY); } /// \class RuleStringCopy /// \brief Replace a sequence of COPY ops moving single characters with a CALLOTHER copying a whole string /// /// Given a root COPY of a constant character, search for other COPYs in the same basic block that form a sequence /// of characters that can be interpreted as a single string. Replace the sequence of COPYs with a single /// \b memcpy or \b wcsncpy user-op. int4 RuleStringCopy::applyOp(PcodeOp *op,Funcdata &data) { if (!op->getIn(0)->isConstant()) return 0; // Constant Varnode *outvn = op->getOut(); Datatype *ct = outvn->getType(); if (!ct->isCharPrint()) return 0; // Copied to a "char" data-type Varnode if (ct->isOpaqueString()) return 0; if (!outvn->isAddrTied()) return 0; SymbolEntry *entry = data.getScopeLocal()->queryContainer(outvn->getAddr(), outvn->getSize(), op->getAddr()); if (entry == (SymbolEntry *)0) return 0; StringSequence sequence(data,ct,entry,op,outvn->getAddr()); if (!sequence.isValid()) return 0; if (!sequence.transform()) return 0; return 1; } void RuleStringStore::getOpList(vector &oplist) const { oplist.push_back(CPUI_STORE); } /// \class RuleStringStore /// \brief Replace a sequence of STORE ops moving single characters with a CALLOTHER copying a whole string /// /// Given a root STORE of a constant character, search for other STOREs in the same basic block off of the /// same base pointer that form a sequence a sequence that can be interpreted as a single string. Replace /// the STOREs with a single \b strncpy or \b wcsncpy user-op. int4 RuleStringStore::applyOp(PcodeOp *op,Funcdata &data) { if (!op->getIn(2)->isConstant()) return 0; // Constant Varnode *ptrvn = op->getIn(1); Datatype *ct = ptrvn->getTypeReadFacing(op); if (ct->getMetatype() != TYPE_PTR) return 0; ct = ((TypePointer *)ct)->getPtrTo(); if (!ct->isCharPrint()) return 0; // Copied to a "char" data-type Varnode if (ct->isOpaqueString()) return 0; HeapSequence sequence(data,ct,op); if (!sequence.isValid()) return 0; if (!sequence.transform()) return 0; return 1; } } // End namespace ghidra