以前段时间 XCTF 抗疫赛中的 hardphp 为例,该题目最终只有一只战队解出,题目作者 zsx 也在随后的文章中点了下解混淆的思路,不过在各 writeup 中没有找到有关解混淆的完整的代码,于是学习一下通过修改 AST 节点解混淆的方法
0x01 php-parser
php-parser 是一个 PHP 库,可以将 PHP 5 或 PHP 7 代码解析为抽象语法树(AST)
https://github.com/nikic/php-parser/releases
安装
1
| composer require nikic/php-parser
|
安装好之后就可以直接引用了, require dirname(__FILE__).'/vendor/autoload.php';
官方文档中有说 XDebug 容易使 php-parser 的运行速度慢五倍以上,最好还是禁用吧
解析代码时首先创建一个解析器 ( parser instance )
1 2
| use PhpParser\ParserFactory; $parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
|
使用 PREFER_PHP7 优先解析 PHP7 代码
使用 ParserFactory 的 parse 方法解析代码,得到一个 statement 节点数组,语法错误可以通过 PhpParser\Error
来捕获
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| <?php use PhpParser\Error; use PhpParser\ParserFactory; require dirname(__FILE__).'/vendor/autoload.php'; $code = <<<'CODE' <?php
echo 'Hello PHP'; CODE;
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try { $stmts = $parser->parse($code); var_dump($stmts); } catch (Error $e) { echo 'Parse Error: ', $e->getMessage(); }
|
可以看到 statement 节点数组如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| array(1) { [0]=> object(PhpParser\Node\Stmt\Echo_) ["exprs"]=> array(1) { [0]=> object(PhpParser\Node\Scalar\String_) ["value"]=> string(9) "Hello PHP" ["attributes":protected]=> array(3) { ["startLine"]=> int(3) ["endLine"]=> int(3) ["kind"]=> int(1) } } } ["attributes":protected]=> array(2) { ["startLine"]=> int(3) ["endLine"]=> int(3) } } }
|
使用 NodeDumper 跟直观得查看 AST
1 2 3 4
| use PhpParser\NodeDumper;
$nodeDumper = new NodeDumper; echo $nodeDumper->dump($stmts), "\n";
|
输出:
1 2 3 4 5 6 7 8 9
| array( 0: Stmt_Echo( exprs: array( 0: Scalar_String( value: Hello PHP ) ) ) )
|
通过 AST 可以方便查看或者修改代码中的某些值,比如通过 ($stmts[0]->exprs)[0]->value = 'Hello Word';
即可修改 Hello PHP 为 Hello Word。但是这是在代码结构已知的情况下做的修改,效率并不高,php-parser 提供了一种用于遍历和访问节点树的组件 PhpParser\NodeTraverser
例如下面代码就可以将原本代码中的 echo 'Hello PHP';
换成 print 'Hello Word';
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| <?php use PhpParser\Error; use PhpParser\ParserFactory; use PhpParser\NodeDumper; use PhpParser\NodeTraverser; use PhpParser\Node; use PhpParser\NodeVisitorAbstract; use PhpParser\PrettyPrinter;
require dirname(__FILE__).'/vendor/autoload.php';
$code = <<<'CODE' <?php
echo 'Hello PHP';
CODE;
class MyNodeVisitor extends NodeVisitorAbstract { public function leaveNode(Node $node) { if ($node instanceof Node\Scalar\String_) { $node->value = 'Hello Word'; } } }
class PrintNodeVisitor extends NodeVisitorAbstract { public function leaveNode(Node $node) { if ($node instanceof Node\Stmt\Echo_) { return new PhpParser\Node\Stmt\Expression( new Node\Expr\Print_(new Node\Scalar\String_(($node->exprs)[0]->value)) ); } } }
$parser = (new ParserFactory)->create(ParserFactory::PREFER_PHP7);
try { $stmts = $parser->parse($code); $traverser = new NodeTraverser; $traverser->addVisitor(new MyNodeVisitor); $traverser->addVisitor(new PrintNodeVisitor); $new_stmts = $traverser->traverse($stmts); $prettyPrinter = new PrettyPrinter\Standard; $new_code = $prettyPrinter->prettyPrintFile($new_stmts); echo $code.PHP_EOL; echo "--After parser:--\n\n".$new_code;
} catch (Error $e) { echo 'Parse Error: ', $e->getMessage(); }
|
0x02 解混淆 (Deobfuscate)
熟悉 php-parser 对 AST 节点的操作就可以解混淆了
比如这个 index.php,可以看出混淆后的基本代码格式为
unserialize + base64_decode 的方式赋值给 GLOBALS 数组,后面全都是基于 GLOBALS 数组的取值、运算和嵌套操作
思路也很简单,就是首先获取 $GLOBALS = unserialize(base64_decode("xxxx"))
模式的 PHP 代码得到的变量(数组),后面在发现调用该数组值时进行替换,然后运算表达式,将得到的字符串进行拼接,换一下变量名。
解变量名
通过正则判断变量名,然后替换节点为新变量名即可
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| class reNameVar extends NodeVisitorAbstract{
public $varCount = 0; public $varReName = [];
public function leaveNode(Node $node){ if ($node instanceof Node\Expr\Variable) { if (!preg_match('/^[a-zA-Z0-9_]+$/', $node->name)) { if (in_array($node->name, array_keys($this->varReName))){ $new_var_name = str_replace($node->name, 'var_' . $this->varReName[$node->name], $node->name); return (new Node\Expr\Variable($new_var_name)); }else{ $this->varReName[$node->name] = $this->varCount++; $new_var_name = str_replace($node->name, 'var_' . $this->varReName[$node->name], $node->name); return (new Node\Expr\Variable($new_var_name)); } } return ; } } }
|
解 unserialize + base64_decode 混淆
在作者 zsx 的 开发简单的PHP混淆器与解混淆器 这篇文章中有一个简单的 demo,不过要改下,直接拿来是不行的,后面数组的值会获取不到,因为这里有两次 unserialize + base64_decode 混淆,而且后面是 ('unserialize')(('base64_decode')('xxx')
这种代码形式,还要控制获取 GLOBALS 数组内容为 NULL 时的返回结果,所以要加这两个判断
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
| class ArrayToConstant extends NodeVisitorAbstract {
public $_variableName = '';
public $_constants = [];
public function enterNode(Node $node) { if ($node instanceof Node\Expr\Assign && $node->expr instanceof Node\Expr\FuncCall && $node->expr->name instanceof Node\Name && is_string($node->expr->name->parts[0]) && $node->expr->name->parts[0] == 'unserialize' && count($node->expr->args) === 1 && $node->expr->args[0] instanceof Node\Arg && $node->expr->args[0]->value instanceof Node\Expr\FuncCall && $node->expr->args[0]->value->name instanceof Node\Name && is_string($node->expr->args[0]->value->name->parts[0]) && $node->expr->args[0]->value->name->parts[0] == 'base64_decode' ) { $string = $node->expr->args[0]->value->args[0]->value->value; $array = unserialize(base64_decode($string)); $this->_variableName = $node->var->name; $this->_constants = $array; return new Node\Expr\Assign($node->var, Node\Scalar\LNumber::fromString("0")); }else if( $node instanceof Node\Expr\Assign && $node->expr instanceof Node\Expr\FuncCall && $node->expr->name instanceof Node\Scalar\String_ && is_string($node->expr->name->value) && $node->expr->name->value == 'unserialize' && count($node->expr->args) === 1 && $node->expr->args[0] instanceof Node\Arg && $node->expr->args[0]->value instanceof Node\Expr\FuncCall && $node->expr->args[0]->value->name instanceof Node\Scalar\String_ && is_string($node->expr->args[0]->value->name->value) && $node->expr->args[0]->value->name->value == 'base64_decode') { $string = $node->expr->args[0]->value->args[0]->value->value; $array = unserialize(base64_decode($string)); $this->_variableName = $node->var->name; $this->_constants = $array; return new Node\Expr\Assign($node->var, Node\Scalar\LNumber::fromString("0")); }else{ return; } }
public function leaveNode(Node $node) { if ($this->_variableName === '') return; if ( $node instanceof Node\Expr\ArrayDimFetch && $node->var->name === $this->_variableName ) { $val = $this->_constants[$node->dim->value]; if ($val === null){ return; } if (is_string($val)) { return new Node\Scalar\String_($val); } elseif (is_double($val)) { return new Node\Scalar\DNumber($val); } elseif (is_int($val)) { return new Node\Scalar\LNumber($val); } else { return new Node\Expr\ConstFetch(new Node\Name\FullyQualified(json_encode($val))); } } }
}
|
解 GLOBALS 键名混淆
这个其实不是很必要,对逻辑没啥影响,不过看起来会工整些
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
| class reNameArr extends NodeVisitorAbstract{
private $arrCount = []; private $arrReName = [];
public function leaveNode(Node $node){ if ( $node instanceof Node\Expr\ArrayDimFetch && !($node->var instanceof Node\Expr\ArrayDimFetch) && !($node->dim instanceof Node\Expr\ArrayDimFetch) ) { $key = $node->dim->value; $arrName = $node->var->name; if (!preg_match('/^[a-zA-Z0-9_]+$/', $key)) { if ($this->arrCount[$arrName] !== null){ if ($this->arrReName[$arrName][$key] !== null){ $new_key_name = str_replace($key, 'key_' . $this->arrReName[$arrName][$key], $key); return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($arrName), new Node\Scalar\String_($new_key_name) ); }else{ $this->arrReName[$arrName][$key] = $this->arrCount[$arrName]++; $new_key_name = str_replace($key, 'key_' . $this->arrReName[$arrName][$key], $key); return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($arrName), new Node\Scalar\String_($new_key_name) ); } }else{ $this->arrReName[$arrName] = []; $this->arrCount[$arrName] = 0; $this->arrReName[$arrName][$key] = $this->arrCount[$arrName]++; $new_key_name = str_replace($key, 'key_' . $this->arrReName[$arrName][$key], $key); return new Node\Expr\ArrayDimFetch( new Node\Expr\Variable($arrName), new Node\Scalar\String_($new_key_name) ); } } return ; } } }
|
计算数字表达式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| class ExpressionToNumber extends NodeVisitorAbstract { public function leaveNode(Node $node) { if ($node instanceof Node\Expr\BinaryOp\Plus && ($node->left instanceof Node\Scalar\LNumber || $node->left instanceof Node\Scalar\String_ || $node->left instanceof Node\Expr\UnaryMinus) && $node->right instanceof Node\Expr\BinaryOp\Minus && ($node->right->left instanceof Node\Scalar\LNumber || $node->right->left instanceof Node\Scalar\String_) && ($node->right->right instanceof Node\Scalar\LNumber || $node->right->right instanceof Node\Scalar\String_)) { if ($node->left instanceof Node\Expr\UnaryMinus) { $a = -($node->left->expr->value); } else { $a = $node->left->value; } $b = $node->right->left->value; $c = $node->right->right->value; return new Node\Scalar\LNumber($a + $b - $c); } } }
|
此时通过再次使用 ArrayToConstant 遍历一下各节点就可以很好的去掉 unserialize + base64_decode 和 GLOBALS 混淆了
解 chr 函数混淆
遍历 AST 找到 (chr)(101)
这类节点使用 chr 函数获取字符串值然后替换该节点
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| class chr2Str extends NodeVisitorAbstract { public function leaveNode(Node $node){ if ($node instanceof Node\Expr\FuncCall && is_string($node->name->value) && $node->name->value == 'chr' && count($node->args) === 1 && $node->args[0] instanceof Node\Arg && $node->args[0]->value instanceof Node\Scalar\LNumber ){ $the_num = $node->args[0]->value->value; return new Node\Scalar\String_(chr($the_num)); } } }
|
此时再计算一次数字表达式然后去 chr 函数,看看效果
解字符拼接混淆
找到 concat 节点,拼接左右的字符串
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| class concat2Str extends NodeVisitorAbstract {
public function leaveNode(Node $node) { if ($node instanceof Node\Expr\BinaryOp\Concat){ if ($node->left instanceof Node\Scalar\String_ && is_string($node->left->value) && $node->right instanceof Node\Scalar\String_ && is_string($node->right->value) ){ return new Node\Scalar\String_($node->left->value . $node->right->value); }
} } }
|
解 str_rot13 函数混淆
和上面解 chr 函数混淆差不多
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| class rot13Decoder extends NodeVisitorAbstract{ public function leaveNode(Node $node){ if ( $node instanceof Node\Expr\FuncCall && $node->name instanceof Node\Scalar\String_ && is_string( $node->name->value ) && $node->name->value == 'str_rot13' && count( $node->args ) === 1 && $node->args[0] instanceof Node\Arg && $node->args[0]->value instanceof Node\Scalar\String_ && is_string($node->args[0]->value->value) ){ return new Node\Scalar\String_(str_rot13($node->args[0]->value->value)); } } }
|
解 call_user_func_array 函数混淆
最后如果觉得 ('call_user_func_array')('call_user_func_array', $var_1)
这种样子不太直观也可以替换一下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| class callUserFuncDecoder extends NodeVisitorAbstract { public function leaveNode (Node $node){ if ( $node instanceof Node\Expr\FuncCall && $node->name instanceof Node\Scalar\String_ && is_string( $node->name->value ) && $node->name->value == 'call_user_func_array' && count( $node->args ) === 2 && $node->args[0] instanceof Node\Arg && $node->args[1] instanceof Node\Arg && $node->args[0]->value instanceof Node\Scalar\String_ && is_string($node->args[0]->value->value) && $node->args[0]->value->value == 'call_user_func_array' ){ $varName = $node->args[1]->value->name; return new Node\Expr\FuncCall(new Node\Name('call_user_func_array'),[new Node\Scalar\String_('call_user_func_array'), new Node\Expr\Variable($varName)]); } } }
|
此时已经基本没啥阅读障碍了
Reference
https://blog.zsxsoft.com/post/42
https://github.com/nikic/PHP-Parser/blob/master/doc