From de842dbd32b0f27d9d7f222f039f6c8ebfa87598 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Wed, 9 Jul 2025 21:14:59 +0000 Subject: [PATCH] GP-5816 Fix return recovery for AARCH64 and ARM --- .../Decompiler/src/decompile/cpp/Doxyfile | 10 +-- .../src/decompile/cpp/inject_sleigh.cc | 76 +++++++++++++++---- .../src/decompile/cpp/inject_sleigh.hh | 68 +++++++++++------ .../src/decompile/cpp/ruleaction.cc | 24 ++++-- .../Decompiler/src/decompile/cpp/varnode.cc | 6 ++ .../src/decompile/datatests/retstruct.xml | 2 +- .../AARCH64/data/languages/AARCH64.cspec | 3 + .../AARCH64/data/languages/AARCH64_win.cspec | 3 + 8 files changed, 135 insertions(+), 57 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile b/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile index 6b2dce549e..d0808edede 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile @@ -457,7 +457,7 @@ RECURSIVE = NO # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. -EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc inject_sleigh.hh inject_sleigh.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc +EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded @@ -589,12 +589,6 @@ VERBATIM_HEADERS = NO ALPHABETICAL_INDEX = NO -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that @@ -744,7 +738,7 @@ COMPACT_LATEX = NO # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. -PAPER_TYPE = a4wide +PAPER_TYPE = a4 # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX # packages that should be included in the LaTeX output. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc index e3abd636f3..d1c550e535 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,6 +33,10 @@ InjectPayloadSleigh::~InjectPayloadSleigh(void) delete tpl; } +/// Create an empty payload in preparation for parsing the injection from a stream +/// \param src is a name or other description of the document to be parsed +/// \param nm is the name of the injection +/// \param tp is the type of injection InjectPayloadSleigh::InjectPayloadSleigh(const string &src,const string &nm,int4 tp) : InjectPayload(nm,tp) { @@ -63,8 +67,8 @@ void InjectPayloadSleigh::inject(InjectContext &context,PcodeEmit &emit) const con.cacher.emit(con.baseaddr,&emit); } -/// The content is read as raw p-code source -/// \param decoder is the XML stream decoder +/// The content is read as raw p-code source. +/// \param decoder is the stream decoder void InjectPayloadSleigh::decodeBody(Decoder &decoder) { @@ -95,11 +99,18 @@ void InjectPayloadSleigh::printTemplate(ostream &s) const tpl->encode(encoder,-1); } +/// \brief Verify that storage locations passed in -con- match the restrictions set for a given payload +/// +/// If the parsed injection does not match the restrictions, an exception is thrown. +/// \param con is the SLEIGH context established after parsing the injection +/// \param inputlist is the list of input parameters specified for the given payload +/// \param output is the list of output parameters specified for the given payload +/// \param source is a description or name for the payload document void InjectPayloadSleigh::checkParameterRestrictions(InjectContextSleigh &con, const vector &inputlist, const vector &output, const string &source) -{ // Verify that the storage locations passed in -con- match the restrictions set for this payload +{ if (inputlist.size() != con.inputlist.size()) throw LowlevelError("Injection parameter list has different number of parameters than p-code operation: "+source); for(int4 i=0;i &inputlist, const vector &output, const string &source) -{ // Set-up operands in the parser state so that they pick up storage locations in InjectContext +{ checkParameterRestrictions(con,inputlist,output,source); ParserContext *pos = walker.getParserContext(); for(int4 i=0;iencode(encoder,-1); } +/// \brief Constructor for use with decode +/// +/// \param g is the Architecture +/// \param base is original InjectPayload object whose dynamic payloads are being cached +InjectPayloadDynamic::InjectPayloadDynamic(Architecture *g,InjectPayload *base) + : InjectPayload(base->getName(),base->getType()) +{ + glb = g; + dynamic = true; + + // Clone basic properties of the original payload + incidentalCopy = base->isIncidentalCopy(); + paramshift = base->getParamShift(); + for(int4 i=0;isizeInput();++i) + inputlist.push_back(base->getInput(i)); + for(int4 i=0;isizeOutput();++i) + output.push_back(base->getOutput(i)); +} + InjectPayloadDynamic::~InjectPayloadDynamic(void) { @@ -260,6 +302,10 @@ InjectPayloadDynamic::~InjectPayloadDynamic(void) delete (*iter).second; } +/// \brief Decode a specific p-code sequence and the context in which it applied +/// +/// Decode the Address for a specific context and then elements for the specific p-code ops. +/// \param decoder is the stream to pull from void InjectPayloadDynamic::decodeEntry(Decoder &decoder) { @@ -301,14 +347,6 @@ PcodeInjectLibrarySleigh::PcodeInjectLibrarySleigh(Architecture *g) contextCache.glb = g; } -int4 PcodeInjectLibrarySleigh::registerDynamicInject(InjectPayload *payload) - -{ - int4 id = injection.size(); - injection.push_back(payload); - return id; -} - /// \brief Force a payload to be dynamic for debug purposes /// /// Debug information may include inject information for payloads that aren't dynamic. @@ -320,12 +358,18 @@ InjectPayloadDynamic *PcodeInjectLibrarySleigh::forceDebugDynamic(int4 injectid) { InjectPayload *oldPayload = injection[injectid]; - InjectPayloadDynamic *newPayload = new InjectPayloadDynamic(glb,oldPayload->getName(),oldPayload->getType()); + InjectPayloadDynamic *newPayload = new InjectPayloadDynamic(glb,oldPayload); delete oldPayload; injection[injectid] = newPayload; return newPayload; } +/// \brief Convert SLEIGH syntax to p-code templates for the given InjectPayload +/// +/// The payload \b parsestring must be populated with SLEIGH synatax. +/// The SLEIGH translator is used to parse the syntax and produce the +/// p-code templates that are then ready to be injected via InjectPayload::inject. +/// \param payload is the given InjectPayload void PcodeInjectLibrarySleigh::parseInject(InjectPayload *payload) { @@ -391,7 +435,7 @@ void PcodeInjectLibrarySleigh::registerInject(int4 injectid) { InjectPayload *payload = injection[injectid]; if (payload->isDynamic()) { - InjectPayload *sub = new InjectPayloadDynamic(glb,payload->getName(),payload->getType()); + InjectPayload *sub = new InjectPayloadDynamic(glb,payload); delete payload; payload = sub; injection[injectid] = payload; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.hh index 2de9eaae66..5b240bb1be 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.hh @@ -4,15 +4,18 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ +/// \file inject_sleigh.hh +/// \brief Implementation of p-code injection using the internal SLEIGH engine to build the p-code ops + #ifndef __INJECT_SLEIGH_HH__ #define __INJECT_SLEIGH_HH__ @@ -21,24 +24,30 @@ namespace ghidra { +/// \brief Context for performing injection using the SLEIGH engine class InjectContextSleigh : public InjectContext { public: - PcodeCacher cacher; - ParserContext *pos; - InjectContextSleigh(void) { pos = (ParserContext *)0; } + PcodeCacher cacher; ///< Cache of p-code data accumulated just prior to injection + ParserContext *pos; ///< Context for final parsing and emitting of p-code during injection + InjectContextSleigh(void) { pos = (ParserContext *)0; } ///< Constructor virtual ~InjectContextSleigh(void); - virtual void encode(Encoder &encoder) const {} // We don't need this functionality for sleigh + virtual void encode(Encoder &encoder) const {} ///< We don't need this functionality for sleigh }; +/// \brief An injection payload built by the SLEIGH engine +/// +/// The p-code ops for the injection are described using SLEIGH syntax. +/// This object can hold both the SLEIGH syntax as a string or the p-code templates +/// (VarnodeTpl and OpTpl) that are prepared for emitting the p-code for the injection. class InjectPayloadSleigh : public InjectPayload { friend class PcodeInjectLibrarySleigh; - ConstructTpl *tpl; - string parsestring; - string source; + ConstructTpl *tpl; ///< The VarnodeTpl and OpTpl objects prepared for injection + string parsestring; ///< SLEIGH syntax describing the injection p-code + string source; ///< A description of the document containing the SLEIGH syntax protected: - void decodeBody(Decoder &decoder); ///< Parse the tag + void decodeBody(Decoder &decoder); ///< Parse the \ tag public: - InjectPayloadSleigh(const string &src,const string &nm,int4 tp); + InjectPayloadSleigh(const string &src,const string &nm,int4 tp); ///< Constructor for use with decode virtual ~InjectPayloadSleigh(void); virtual void inject(InjectContext &context,PcodeEmit &emit) const; virtual void decode(Decoder &decoder); @@ -52,24 +61,27 @@ public: const string &source); }; +/// \brief An injection payload, described by SLEIGH, for replacing CALL ops to specific functions class InjectPayloadCallfixup : public InjectPayloadSleigh { - vector targetSymbolNames; + vector targetSymbolNames; ///< Names (symbols) of specific functions to replace with \b this payload public: - InjectPayloadCallfixup(const string &sourceName); + InjectPayloadCallfixup(const string &sourceName); ///< Constructor virtual void decode(Decoder &decoder); }; +/// \brief An injection payload, described by SLEIGH, for replacing specific user (CALLOTHER) ops class InjectPayloadCallother : public InjectPayloadSleigh { public: - InjectPayloadCallother(const string &sourceName); + InjectPayloadCallother(const string &sourceName); ///< Constructor virtual void decode(Decoder &decoder); }; +/// \brief A p-code snippet, described by SLEIGH, that can be executed as a script class ExecutablePcodeSleigh : public ExecutablePcode { friend class PcodeInjectLibrarySleigh; protected: - string parsestring; - ConstructTpl *tpl; + string parsestring; ///< SLEIGH syntax describing the p-code snippet + ConstructTpl *tpl; ///< Parsed template objects (VarnodeTpl and OpTpl) ready for injection public: ExecutablePcodeSleigh(Architecture *g,const string &src,const string &nm); virtual ~ExecutablePcodeSleigh(void); @@ -78,11 +90,16 @@ protected: virtual void printTemplate(ostream &s) const; }; +/// \brief A debugging placeholder for a payload that changes depending on context +/// +/// Implemented as a simple map from an Address to an XML description of the p-code sequence to inject. +/// This is used internally by PcodeInjectLibrarySleigh in a debug environment to hold multiple payloads +/// for objects where InjectPayload::isDynamic() returns \b true. class InjectPayloadDynamic : public InjectPayload { - Architecture *glb; - map addrMap; // Map from address to specific inject + Architecture *glb; ///< The architecture owning \b this payload + map addrMap; ///< Map from address to specific inject public: - InjectPayloadDynamic(Architecture *g,const string &nm,int4 tp) : InjectPayload(nm,tp) { glb = g; dynamic = true; } + InjectPayloadDynamic(Architecture *g,InjectPayload *base); virtual ~InjectPayloadDynamic(void); void decodeEntry(Decoder &decoder); virtual void inject(InjectContext &context,PcodeEmit &emit) const; @@ -91,18 +108,21 @@ public: virtual string getSource(void) const { return "dynamic"; } }; +/// \brief An implementation of an injection library using the internal SLEIGH engine to build payloads +/// +/// Payloads from compiler specs and other sources are parsed as SLEIGH syntax and stored +/// internally as InjectPayloadSleigh objects. class PcodeInjectLibrarySleigh : public PcodeInjectLibrary { - const SleighBase *slgh; - vector inst; - InjectContextSleigh contextCache; - int4 registerDynamicInject(InjectPayload *payload); + const SleighBase *slgh; ///< The SLEIGH engine for parsing payloads + vector inst; ///< P-code behaviors used for p-code scripts + InjectContextSleigh contextCache; ///< Reusable context for emitting p-code payloads InjectPayloadDynamic *forceDebugDynamic(int4 injectid); void parseInject(InjectPayload *payload); protected: virtual int4 allocateInject(const string &sourceName,const string &name,int4 type); virtual void registerInject(int4 injectid); public: - PcodeInjectLibrarySleigh(Architecture *g); + PcodeInjectLibrarySleigh(Architecture *g); ///< Constructor virtual void decodeDebug(Decoder &decoder); virtual int4 manualCallFixup(const string &name,const string &snippetstring); virtual int4 manualCallOtherFixup(const string &name,const string &outname,const vector &inname, diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 2b4a9474e5..8df64ca548 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -9602,12 +9602,12 @@ bool RuleIgnoreNan::isAnotherNan(Varnode *vn) /// /// The given PcodeOp takes input from a NaN operation through a specific slot. We look for a floating-point comparison /// PcodeOp (FLOAT_LESS, FLOAT_LESSEQUAL, FLOAT_EQUAL, or FLOAT_NOTEQUAL) that is combined with the given PcodeOp and -/// has the same input Varnode as the NaN. The data-flow must be combined either through a BOOL_OR or BOOL_AND -/// operation, or the given PcodeOp must be a CBRANCH that protects immediate control-flow to another CBRANCH -/// taking the result of the comparison as input. If a matching comparison is found, the NaN input to the given -/// PcodeOp is removed, assuming the output of the NaN operation is always \b false. -/// Input from an unmodified NaN result must be combined through a BOOL_OR, but a NaN result that has been negated -/// must combine through a BOOL_AND. +/// has the same input Varnode as the NaN. The data-flow must be combined either through a BOOL_OR, BOOL_AND, +/// INT_EQUAL, or INT_NOTEQUAL operation, or the given PcodeOp must be a CBRANCH that protects immediate control-flow +/// to another CBRANCH taking the result of the comparison as input. If a matching comparison is found, the NaN input +/// to the given PcodeOp is removed, assuming the output of the NaN operation is always \b false. +/// If a NaN result is combined through a BOOL_OR, it must be unmodified, but a NaN result combined +/// through a BOOL_AND must be negated. /// \param floatVar is the input Varnode to NaN operation /// \param op is the given PcodeOp to test /// \param slot is the input index of the NaN operation @@ -9618,7 +9618,8 @@ bool RuleIgnoreNan::isAnotherNan(Varnode *vn) Varnode *RuleIgnoreNan::testForComparison(Varnode *floatVar,PcodeOp *op,int4 slot,OpCode matchCode,int4 &count,Funcdata &data) { - if (op->code() == matchCode) { + OpCode opc = op->code(); + if (opc == matchCode) { Varnode *vn = op->getIn(1 - slot); if (checkBackForCompare(floatVar,vn)) { data.opSetOpcode(op,CPUI_COPY); @@ -9630,7 +9631,14 @@ Varnode *RuleIgnoreNan::testForComparison(Varnode *floatVar,PcodeOp *op,int4 slo return op->getOut(); } } - else if (op->code() == CPUI_CBRANCH) { + else if (opc == CPUI_INT_EQUAL || opc == CPUI_INT_NOTEQUAL) { + Varnode *vn = op->getIn(1 - slot); + if (checkBackForCompare(floatVar,vn)) { + data.opSetInput(op,data.newConstant(1,(matchCode == CPUI_BOOL_OR) ? 0 : 1),slot); + count += 1; + } + } + else if (opc == CPUI_CBRANCH) { BlockBasic *parent = op->getParent(); PcodeOp *lastOp; int4 outDir = (matchCode == CPUI_BOOL_OR) ? 0 : 1; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc index 25cd787008..147eff5fd9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc @@ -2004,6 +2004,12 @@ bool TraverseNode::isAlternatePathValid(const Varnode *vn,uint4 flags) if (vn->loneDescend() == (PcodeOp*)0) return false; const PcodeOp *op = vn->getDef(); if (op == (PcodeOp*)0) return true; + while(op->isIncidentalCopy() && op->code() == CPUI_COPY) { // Skip any incidental COPY + vn = op->getIn(0); + if (vn->loneDescend() == (PcodeOp *)0) return false; + op = vn->getDef(); + if (op == (PcodeOp *)0) return true; + } return !op->isMarker(); // MULTIEQUAL or INDIRECT indicates multiple values } diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/retstruct.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/retstruct.xml index 4cc656af77..bbccfe65ee 100644 --- a/Ghidra/Features/Decompiler/src/decompile/datatests/retstruct.xml +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/retstruct.xml @@ -1,5 +1,5 @@ - + diff --git a/Ghidra/Processors/AARCH64/data/languages/AARCH64.cspec b/Ghidra/Processors/AARCH64/data/languages/AARCH64.cspec index 98879ad29d..5c3c5b0b0d 100644 --- a/Ghidra/Processors/AARCH64/data/languages/AARCH64.cspec +++ b/Ghidra/Processors/AARCH64/data/languages/AARCH64.cspec @@ -213,6 +213,9 @@ + + + diff --git a/Ghidra/Processors/AARCH64/data/languages/AARCH64_win.cspec b/Ghidra/Processors/AARCH64/data/languages/AARCH64_win.cspec index df92794582..873f0bbe86 100644 --- a/Ghidra/Processors/AARCH64/data/languages/AARCH64_win.cspec +++ b/Ghidra/Processors/AARCH64/data/languages/AARCH64_win.cspec @@ -241,6 +241,9 @@ + + +