我们只进行一些总结,基础理解我参考了这篇文章
Cython编写
系统环境:windows,python编译器pycharm(python虚拟环境.venv),c/c++编译器visual stdio 2022
1.在python中安装cython和setuptools包
建议给Cython单独开一个python虚拟环境,有时候包多了导入pyd会报错。
pycharm可以直接使用包管理器搜索安装。
pip install cython
pip install setuptools
2.安装c\c++编译器
我这里用的是visual stdio,也可以用MinGW。
3.编写cython拓展
可以参考用户指南 - 《Cython 3.0 中文文档》 - 书栈网 · BookStack
随意编写一个source.py作为拓展模块,代码如下
def sayhello():
a = "hello world"
print(a)
cdef int add(int n,int m):
return m+n
def aadd(n,m):
print(add(n,m))
还需要一个编译文件,这里我们用myc.py,代码如下
from setuptools import setup
from Cython.Build import cythonize
setup(ext_modules=cythonize("source.pyx",language_level=3))
"""
cythonize函数用于把cython编译成c文件
setup把c文件生成拓展模块
cythonize第一个参数是要编译的pyx或者py文件(与本文件在同一目录下),
language_level=3表示在python3环境下运行,默认python2和3通用
"""
用cmd编译拓展模块
在上面文件所在目录打开cmd并启动我们的python虚拟环境(不是虚拟环境的跳过),输入的命令如下,我这里是”E:\python projects\cython\.venv\Scripts\activate”
"项目根目录\.venv\Scripts\activate"
再输入一下命令进行编译,我这里是python myc.py build
python [编译文件] build
python [编译文件] build_ext --inplace #这个指令直接把拓展模块放到当前目录而不是build目录
当前目录下的build文件夹中的lib文件夹就是我们的库文件.pyd,我们直使用这个库文件就行,其他的可以删除。把pyd文件放到工作目录下,直接按名字导入即可使用。
cython还可以导入c文件,这个先后面再说吧,好像有点问题
from setuptools import setup, Extension
from Cython.Build import cythonize
setup(ext_modules=cythonize(Extension(name="myc", sources=["source.pyx", "rc4enc.cpp"]),language_level=3))
Cython逆向
我们自己编译一份cython文件分析,这里我实现了一个RC4加密
source.pyx
def sayhello():
a = "hello world"
print(a)
cdef int add(int n,int m):
return m+n
def aadd(n,m):
print(add(n,m))
def rc4():
x=[0x1c,0x05,0x96,0xd3,0xf1,0x62,0x90,0x7d,0x17,0x86,0xd6,0x63,0xd8,0xe2,0x9a,0x93,0x44,0x6b,0xee,0xeb,0x4d,0x6c,0xcc,0x6c,0x38]
key='key!key!'
flag = ''
j = 0
c = x
s = list(range(256))
for i in range(256):
j = ((j + s[i]) + ord(key[i % len(key)])) % 256
s[i], s[j] = s[j], s[i]
j = 0
i = 0
for r in c:
i = (i + 1) % 256
j = (j + s[i]) % 256
s[i], s[j] = s[j], s[i]
x = (s[i] + s[j] % 256) % 256
flag += chr(r ^ s[x] % 256)
print(flag)
imp.py
import source
source.sayhello()
source.aadd(10,10)
source.rc4()
# hello world
# 20
# flag{yes!_this_is_cython}
恢复符号
用help函数可以看到模块的一些信息
import source
help(source)
"""
返回结果
Help on module source:
NAME #模块名
source
FUNCTIONS #包含的函数
aadd(n, m)
rc4()
sayhello()
DATA
__test__ = {}
FILE #源文件
e:\python projects\cython\source.cp313-win_amd64.pyd
"""
我们把得到的pyd文件拖入ida分析。如果是.so文件函数的符号是被去掉了,我们可以在linux下随便编译一个python版本相同且内容最好要相似的cython文件,用bindiff来恢复符号表。(python版本在ida的Imports窗口可以看到”我这里是python313,即为3.13”)
根据上面的信息我们用linux编译一个差不多的cython模块。这里我用linux的pycharm部署了python3.13的cython环境。
sudo apt install python3.13
sudo apt-get install python3.13-dev
source.pyx
def sayhello():
return 1
def aadd(n, m):
return 1
def rc4():
return 1
mycython.py同上
在目录下用cython编译”./.venv/bin/python3.13 myc.py build_ext –inplace”
把so文件复制出来,用ida打开一次获得idb数据库,bindiff导入恢复符号。
这里有一个细节,就是windows下的cython文件默认是会去掉符号的,而linux却不会。所以如果是.pyd文件的话,我们要用下面的脚本进行编译来保留符号。
mycthon.py
from setuptools import setup,Extension
from Cython.Build import cythonize
module = Extension(
"MyPyd",
sources=["MyPyd.py"],
extra_compile_args=["/Zi"], # 生成调试信息
extra_link_args=["/DEBUG"], # 生成 PDB 文件
)
setup(
ext_modules = cythonize(module,annotate=True)
)
source.pyx同上面linux的
编译后会生成.pyd文件和.pdb(在build文件夹里面),用ida打开.pyd就可以导入.pdb文件,再用源文件与ida生成的.idb文件用bindiff插件即可恢复符号(ctrl+6)。
重建结构体
_Pyx_CreateStringTabAndInitStrings()保存了要使用的字符串常量,用于加载字符串。
_ Pyx_InitConstants()保存了要使用的数字常量。
为了更容易阅读代码中对常量的引用我们要重建一下结构体。
我们先在off_18000B6A8按”Y“把类型设置为”__int64*”
我们先定义一个结构体,结构体的大小最好大于等于n(off_18000B6A8 + n),我这里是27,我们就创建一个有27个__int64元素的结构体。
//一个结构体模板_Pyx_CreateStringTabAndInitStrings()
typedef struct __strings {
__int64* str_0;
__int64* str_1;
__int64* str_2;
__int64* str_3;
__int64* str_4;
__int64* str_5;
__int64* str_6;
__int64* str_7;
__int64* str_8;
__int64* str_9;
__int64* str_10;
__int64* str_11;
__int64* str_12;
__int64* str_13;
__int64* str_14;
__int64* str_15;
__int64* str_16;
__int64* str_17;
__int64* str_18;
__int64* str_19;
__int64* str_20;
__int64* str_21;
__int64* str_22;
__int64* str_23;
__int64* str_24;
__int64* str_25;
__int64* str_26;
__int64* str_27;
__int64* str_28;
__int64* str_29;
__int64* str_30;
__int64* str_31;
__int64* str_32;
__int64* str_33;
__int64* str_34;
__int64* str_35;
__int64* str_36;
__int64* str_37;
__int64* str_38;
__int64* str_39;
__int64* str_40;
__int64* str_41;
__int64* str_42;
__int64* str_43;
__int64* str_44;
__int64* str_45;
__int64* str_46;
__int64* str_47;
__int64* str_48;
__int64* str_49;
__int64* str_50;
__int64* str_51;
__int64* str_52;
__int64* str_53;
__int64* str_54;
__int64* str_55;
__int64* str_56;
__int64* str_57;
__int64* str_58;
__int64* str_59;
__int64* str_60;
__int64* str_61;
__int64* str_62;
__int64* str_63;
} str;
//一个结构体模板__Pyx_InitConstants()
typedef struct _iint {
__int64* int_0;
__int64* int_1;
__int64* int_2;
__int64* int_3;
__int64* int_4;
__int64* int_5;
__int64* int_6;
__int64* int_7;
__int64* int_8;
__int64* int_9;
__int64* int_10;
__int64* int_11;
__int64* int_12;
__int64* int_13;
__int64* int_14;
__int64* int_15;
__int64* int_16;
__int64* int_17;
__int64* int_18;
__int64* int_19;
__int64* int_20;
__int64* int_21;
__int64* int_22;
__int64* int_23;
__int64* int_24;
__int64* int_25;
__int64* int_26;
__int64* int_27;
__int64* int_28;
__int64* int_29;
__int64* int_30;
__int64* int_31;
__int64* int_32;
__int64* int_33;
__int64* int_34;
__int64* int_35;
__int64* int_36;
__int64* int_37;
__int64* int_38;
__int64* int_39;
__int64* int_40;
__int64* int_41;
__int64* int_42;
__int64* int_43;
__int64* int_44;
__int64* int_45;
__int64* int_46;
__int64* int_47;
__int64* int_48;
__int64* int_49;
__int64* int_50;
__int64* int_51;
__int64* int_52;
__int64* int_53;
__int64* int_54;
__int64* int_55;
__int64* int_56;
__int64* int_57;
__int64* int_58;
__int64* int_59;
__int64* int_60;
__int64* int_61;
__int64* int_62;
__int64* int_63;
} iint;
_Pyx_InitConstants()中数字常量结构体创建的脚本,看情况修改xxxxxxxxx处的值
import re
# 输入_Pyx_InitConstants()的内容
content = '''
'''
def modify_struct(content):
# 创建一个字典映射 PyLong_FromLong 的值到结构体成员名称
mapping = {}
pattern = re.compile(r'PyLong_FromLong\((\d+)LL\);')
# 查找 content 中所有 PyLong_FromLong 的值
for match,i in zip(re.finditer(pattern, content),range(64)): #range()内表示数据的最大数量XXXXXXXX
num = int(match.group(1))
mapping[num] = i + 37 # 假设从 37 开始,结构体开始的位置XXXXXXX
# 替换结构体定义中的成员名
for num, idx in mapping.items():
# 计算成员名
old_member = f'int_{idx}'
new_member = f'__int64* int_{num}'
print(new_member)
modify_struct(content)
先修改字符串的。按偏移量把元素名字修改成好阅读的
typedef struct __strings {
__int64* str_0;
__int64* str_1;
__int64* str_2;
__int64* str_3;
__int64* str_4;
__int64* str_5;
__int64* str_6;
__int64* str_7;
__int64* str_wenhao;
__int64* str_unk_180009078;
__int64* str_aadd;
__int64* str_asyncio_coroutines;
__int64* str_unk_1800090B0;
__int64* str_chr;
__int64* str___class_getitem__;
__int64* str_cline_in_traceback;
__int64* str_flag;
__int64* str_hellow_orld;
__int64* str_i;
__int64* str__is_coroutine;
__int64* str_j;
__int64* str_key;
__int64* str_keykey;
__int64* str_m;
__int64* str___main__;
__int64* str_n;
__int64* str___name__;
__int64* str_print;
__int64* str_r;
__int64* str_range;
__int64* str_rc4;
__int64* str_s;
__int64* str_sayhello;
__int64* str_source;
__int64* str_sourcedpyx;
__int64* str___test__;
__int64* str_x;
} str;
同样再重建一下_Pyx_InitConstants()内的结构体,数字常量的开头索引就是37,我们就截取从37开始的结构体,用脚本进行重命名,中间要是有不连续的索引记得看情况补上一个成员。
typedef struct _iint {
__int64* int_37;
__int64* int_38;
__int64* int_39;
__int64* int_40;
__int64* int_41;
__int64* int_42;
__int64* int_43;
__int64* int_44;
__int64* int_45;
__int64* int_46;
__int64* int_47;
__int64* int_48;
__int64* int_49;
__int64* int_50;
__int64* int_51;
__int64* int_52;
__int64* int_53;
__int64* int_54;
__int64* int_55;
__int64* int_56;
__int64* int_57;
__int64* int_58;
__int64* int_59;
__int64* int_60;
__int64* int_61;
__int64* int_62;
__int64* int_63;
} iint;
import re
content = '''v0 = PyLong_FromLong(0LL);
*((_QWORD *)_pyx + 37) = v0;
if ( !v0 )
return 0xFFFFFFFFLL;
v1 = PyLong_FromLong(1LL);
*((_QWORD *)_pyx + 38) = v1;
if ( !v1 )
return 0xFFFFFFFFLL;
v2 = PyLong_FromLong(5LL);
*((_QWORD *)_pyx + 39) = v2;
if ( !v2 )
return 0xFFFFFFFFLL;
v3 = PyLong_FromLong(23LL);
*((_QWORD *)_pyx + 40) = v3;
if ( !v3 )
return 0xFFFFFFFFLL;
v4 = PyLong_FromLong(28LL);
*((_QWORD *)_pyx + 41) = v4;
if ( !v4 )
return 0xFFFFFFFFLL;
v5 = PyLong_FromLong(56LL);
*((_QWORD *)_pyx + 42) = v5;
if ( !v5 )
return 0xFFFFFFFFLL;
v6 = PyLong_FromLong(68LL);
*((_QWORD *)_pyx + 43) = v6;
if ( !v6 )
return 0xFFFFFFFFLL;
v7 = PyLong_FromLong(77LL);
*((_QWORD *)_pyx + 44) = v7;
if ( !v7 )
return 0xFFFFFFFFLL;
v8 = PyLong_FromLong(98LL);
*((_QWORD *)_pyx + 45) = v8;
if ( !v8 )
return 0xFFFFFFFFLL;
v9 = PyLong_FromLong(99LL);
*((_QWORD *)_pyx + 46) = v9;
if ( !v9 )
return 0xFFFFFFFFLL;
v10 = PyLong_FromLong(107LL);
*((_QWORD *)_pyx + 47) = v10;
if ( !v10 )
return 0xFFFFFFFFLL;
v11 = PyLong_FromLong(108LL);
*((_QWORD *)_pyx + 48) = v11;
if ( !v11 )
return 0xFFFFFFFFLL;
v12 = PyLong_FromLong(125LL);
*((_QWORD *)_pyx + 49) = v12;
if ( !v12 )
return 0xFFFFFFFFLL;
v13 = PyLong_FromLong(134LL);
*((_QWORD *)_pyx + 50) = v13;
if ( !v13 )
return 0xFFFFFFFFLL;
v14 = PyLong_FromLong(144LL);
*((_QWORD *)_pyx + 51) = v14;
if ( !v14 )
return 0xFFFFFFFFLL;
v15 = PyLong_FromLong(147LL);
*((_QWORD *)_pyx + 52) = v15;
if ( !v15 )
return 0xFFFFFFFFLL;
v16 = PyLong_FromLong(150LL);
*((_QWORD *)_pyx + 53) = v16;
if ( !v16 )
return 0xFFFFFFFFLL;
v17 = PyLong_FromLong(154LL);
*((_QWORD *)_pyx + 54) = v17;
if ( !v17 )
return 0xFFFFFFFFLL;
v18 = PyLong_FromLong(204LL);
*((_QWORD *)_pyx + 55) = v18;
if ( !v18 )
return 0xFFFFFFFFLL;
v19 = PyLong_FromLong(211LL);
*((_QWORD *)_pyx + 56) = v19;
if ( !v19 )
return 0xFFFFFFFFLL;
v20 = PyLong_FromLong(214LL);
*((_QWORD *)_pyx + 57) = v20;
if ( !v20 )
return 0xFFFFFFFFLL;
v21 = PyLong_FromLong(216LL);
*((_QWORD *)_pyx + 58) = v21;
if ( !v21 )
return 0xFFFFFFFFLL;
v22 = PyLong_FromLong(226LL);
*((_QWORD *)_pyx + 59) = v22;
if ( !v22 )
return 0xFFFFFFFFLL;
v23 = PyLong_FromLong(235LL);
*((_QWORD *)_pyx + 60) = v23;
if ( !v23 )
return 0xFFFFFFFFLL;
v24 = PyLong_FromLong(238LL);
*((_QWORD *)_pyx + 61) = v24;
if ( !v24 )
return 0xFFFFFFFFLL;
v25 = PyLong_FromLong(241LL);
*((_QWORD *)_pyx + 62) = v25;
if ( !v25 )
return 0xFFFFFFFFLL;
v26 = PyLong_FromLong(256LL);
*((_QWORD *)_pyx + 63) = v26;
'''
def modify_struct(content):
# 创建一个字典映射 PyLong_FromLong 的值到结构体成员名称
mapping = {}
pattern = re.compile(r'PyLong_FromLong\((\d+)LL\);')
# 查找 content 中所有 PyLong_FromLong 的值
for match,i in zip(re.finditer(pattern, content),range(64)): #range()内表示数据的最大数量XXXXXXXX
num = int(match.group(1))
mapping[num] = i + 37 # 假设从 37 开始,结构体开始的位置XXXXXXX
# 替换结构体定义中的成员名
for num, idx in mapping.items():
# 计算成员名
old_member = f'int_{idx}'
new_member = f'__int64* int_{num}'
print(new_member)
modify_struct(content)
typedef struct _iint {
__int64* int_0;
__int64* int_1;
__int64* int_5;
__int64* int_23;
__int64* int_28;
__int64* int_56;
__int64* int_68;
__int64* int_77;
__int64* int_98;
__int64* int_99;
__int64* int_107;
__int64* int_108;
__int64* int_125;
__int64* int_134;
__int64* int_144;
__int64* int_147;
__int64* int_150;
__int64* int_154;
__int64* int_204;
__int64* int_211;
__int64* int_214;
__int64* int_216;
__int64* int_226;
__int64* int_235;
__int64* int_238;
__int64* int_241;
__int64* int_256;
} iint;
如果_Pyx_CreateStringTabAndInitStrings()和_ Pyx_InitConstants()使用的是相同的基地址(都是off_18000B6A8),我们把两个结构体合并,两个结构体之间连接的索引要连续。如果不相同,就分别修复结构体就行。
typedef struct data {
__int64* str_0;
__int64* str_1;
__int64* str_2;
__int64* str_3;
__int64* str_4;
__int64* str_5;
__int64* str_6;
__int64* str_7;
__int64* str_wenhao;
__int64* str_unk_180009078;
__int64* str_aadd;
__int64* str_asyncio_coroutines;
__int64* str_unk_1800090B0;
__int64* str_chr;
__int64* str___class_getitem__;
__int64* str_cline_in_traceback;
__int64* str_flag;
__int64* str_hellow_orld;
__int64* str_i;
__int64* str__is_coroutine;
__int64* str_j;
__int64* str_key;
__int64* str_keykey;
__int64* str_m;
__int64* str___main__;
__int64* str_n;
__int64* str___name__;
__int64* str_print;
__int64* str_r;
__int64* str_range;
__int64* str_rc4;
__int64* str_s;
__int64* str_sayhello;
__int64* str_source;
__int64* str_sourcedpyx;
__int64* str___test__;
__int64* str_x;
__int64* int_0;
__int64* int_1;
__int64* int_5;
__int64* int_23;
__int64* int_28;
__int64* int_56;
__int64* int_68;
__int64* int_77;
__int64* int_98;
__int64* int_99;
__int64* int_107;
__int64* int_108;
__int64* int_125;
__int64* int_134;
__int64* int_144;
__int64* int_147;
__int64* int_150;
__int64* int_154;
__int64* int_204;
__int64* int_211;
__int64* int_214;
__int64* int_216;
__int64* int_226;
__int64* int_235;
__int64* int_238;
__int64* int_241;
__int64* int_256;
} data;
在local type窗口添加这个结构体。再按Y把off_18000B6A8的类型改为结构体的名字(这里是data),再按n把off_18000B6A8重命名一下。
记得检查一下结果是否对应正确。
逆向分析
不认识的函数可以查抽象对象层
_Pyx_AddTraceback()用于记录调用栈信息,会写入当前函数在模块中的名字,一般为”模块名.函数名”,这使得我们可以在字符串表中找到我们需要逆向的函数。
首先我们分析一下source.sayhello
①处的处理逻辑(gpt辅助)
在 Cython 编译成 C 代码后,Python 对象在底层是通过 PyObject
结构体来表示的,而这个结构体包含了对象的多种信息,其中就包括引用计数。
typedef struct {
PyObject_VAR_HEAD
PyObject *ob_type; // 类型信息
Py_ssize_t ob_refcnt; // 引用计数器
} PyObject;
异常捕捉:
第一部分的if (v3)
检查了一个条件,可能是一个对象指针。接下来的代码是尝试进行某种资源的释放或对象的内存管理:- 检查
*v3 >= 0
,这可能是在检查指针指向的对象的状态。 v5 = (*(_QWORD *)v3)-- == 1LL;
v3
强制类型转换为指向 64 位整型的指针,直接访问v3
指向的PyObject
结构体中的ob_refcnt
字段,检查对象引用计数是否为 1,并且在引用计数为 1 时调用Py_Dealloc
来释放对象。v4 = Py_NoneStruct;
和if (Py_NoneStruct != -1) ++Py_NoneStruct;
这部分可能是与None
对象的引用计数有关,Py_NoneStruct
是一个指向 PythonNone
的结构体指针。
- 检查
异常回溯:
如果v3
为NULL
或者其他异常情况,代码会调用_Pyx_AddTraceback
记录调用栈信息:_Pyx_AddTraceback("source.sayhello", 2562LL, 3LL, "source.pyx"); //函数,行号,栈帧的深度,原文件名称
这表示在
source.pyx
文件的某个地方发生了问题。
source.aadd
// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall _pyx_pw_6source_3aadd(__int64 a1, __int64 *a2, __int64 a3, __int64 a4)
{
......
*(_QWORD *)&input = _Pyx_GetKwValue_FASTCALL(a4, v5, _pyx->str_n);// 以字典获取关键字参数,a4是指向字典(kwargs)的指针,v5是存储返回结果的变量,n是参数名,结果返回到input[0]
input_b = input;
if ( !(_QWORD)input )
{
if ( PyErr_Occurred() )
{
v11 = 2684LL;
goto LABEL_26;
}
goto LABEL_25;
}
v7 = _pyx;
--v10;
LABEL_13:
*((_QWORD *)&input + 1) = _Pyx_GetKwValue_FASTCALL(a4, v5, v7->str_m);// 同上,返回参数m的值到input[1]中
if ( !*((_QWORD *)&input + 1) ) // 一些异常处理
{
if ( PyErr_Occurred() )
{
v11 = 2692LL;
}
else
{
_Pyx_RaiseArgtupleInvalid(v13, v12, v14, v15, 1LL);// 检查函数参数个数
v11 = 2694LL;
}
goto LABEL_26;
}
--v10;
LABEL_15:
if ( v10 > 0 )
{
if ( (int)_Pyx_ParseOptionalKeywords(a4, (_DWORD)v5, (unsigned int)v26, a4, (__int64)&input, a3) < 0 )// 处理函数调用中的可选关键字参数把值解析到对应的函数参数中
{
v11 = 2699LL;
LABEL_26:
_Pyx_AddTraceback("source.aadd", v11, 8LL, "source.pyx");
return 0LL;
}
input_b = input; // 赋值
}
LABEL_28:
input0 = _Pyx_PyInt_As_int(input_b); // 从python对象里提出一个值转为c的int类型
if ( input0 == -1 && PyErr_Occurred() )
{
v18 = 2759LL;
}
else
{
input1 = _Pyx_PyInt_As_int(*((_QWORD *)&input + 1));
if ( input1 == -1 && PyErr_Occurred() )
{
v18 = 2760LL;
}
else
{
input_add = input0 + input1; // m + n
if ( input_add == -1 && PyErr_Occurred() )
{
v18 = 2761LL;
}
else
{
input_add_pyint = PyLong_FromLong(input_add);// 结果转为python的数字对象
v22 = (int *)input_add_pyint;
if ( input_add_pyint )
{
v23 = (int *)_Pyx_PyObject_CallOneArg(return, input_add_pyint);// 调用函数return返回相加结果
......
source.rc4
// write access to const memory has been detected, the output may be wrong!
__int64 _pyx_pf_6source_4rc4()
{
......
s2 = 0LL;
sj_y256 = 0LL;
v2 = 0LL;
count3 = 0LL;
v4 = 25;
key_index = 0LL;
v80 = 0LL;
key = 0LL;
m1 = 0LL;
sbox2 = 0LL;
i2 = 0LL;
m5 = 0LL;
m = PyList_New(25LL); // 创建了一个大小为25的列表
if ( !m )
{
v4 = 12;
v7 = 2845;
goto LABEL_290;
}
v8 = _pyx; // 赋值
int_28 = _pyx->int_28; //把密文加载到列表m中
if ( *(_DWORD *)int_28 != -1 )
++*(_DWORD *)int_28;
**(_QWORD **)(m + 24) = v8->int_28;
......
if ( *(_DWORD *)int_56 != -1 )
++*(_DWORD *)int_56;
v80 = (int *)m;
*(_QWORD *)(*(_QWORD *)(m + 24) + 192LL) = v8->int_56;// 上面都是在给列表赋值,值就是一些数字常量,这是密文
str_keykey = v8->str_keykey;
if ( *(_DWORD *)str_keykey != -1 )
++*(_DWORD *)str_keykey;
str_7 = v8->str_7;
key = v8->str_keykey; // 加载密钥
if ( *(_DWORD *)str_7 != -1 )
++*(_DWORD *)str_7;
int_0 = v8->int_0;
flag = v8->str_7;
if ( *(_DWORD *)int_0 != -1 )
++*(_DWORD *)int_0;
j = (int *)v8->int_0;
v83 = j;
if ( *(_DWORD *)m != -1 )
++*(_DWORD *)m;
m1 = (int *)m;
s = _Pyx_PyObject_Call(qword_18000BF48, v8[1].str_0);// 调用一个对象,参数分别是调用对象名和参数,猜测返回值是256的那个表
if ( !s )
{
v4 = 17;
v7 = 2972;
goto LABEL_289;
}
if ( *(_QWORD *)(s + 8) == PyList_Type && *(_QWORD *)s == 1LL )// 判断返回值是否是列表,引用计数是否为1
{
*(_DWORD *)s = 2;
s2 = s; // 把列表赋值给v0
}
else
{
s2 = PySequence_List(s); // 把python对象转成列表,赋值给v0
if ( !s2 )
{
v4 = 17;
v7 = 2974;
LABEL_268:
if ( *(int *)s >= 0 ) // 判断是否释放对象
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
if ( !s2 )
{
LABEL_277:
if ( key_index )
goto LABEL_278;
goto LABEL_281;
}
LABEL_274:
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
goto LABEL_277;
}
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
sbox1 = s2; // 表
sbox2 = (int *)s2; // 表,暂时分不清类型是什么,感觉一个是地址,一个是元素
do
{
count1 = PyLong_FromLong((unsigned int)count3);// v3的值是0,赋值给count1
i = (int *)count1; // 赋值count1给i,i由下面推出
if ( !count1 )
{
v4 = 18;
v7 = 2988;
goto LABEL_287;
}
v43 = i2;
i2 = (int *)count1; // 赋值count1到count2
if ( v43 )
{
if ( *v43 >= 0 )
{
v39 = (*(_QWORD *)v43)-- == 1LL;
if ( v39 )
Py_Dealloc(v43);
}
}
Item = _Pyx_PyObject_GetItem(sbox1, i); // 从盒中获取元素s[i]
s2 = Item;
if ( !Item )
{
v4 = 19;
v7 = 3000;
goto LABEL_287;
}
s = PyNumber_Add(j, Item); // j的初始值是0,这里实现了j+s[i]
if ( !s )
{
v4 = 19;
v7 = 3002;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
s2 = 0LL;
if ( key[2] == -1 ) // 这里使用了key,下面的操作可能有关于key
{
v7 = 3005;
goto LABEL_267;
}
keylen = PyLong_FromSsize_t(); // 由下面可以更确定i,同时得知这里在获取key的大小
s2 = keylen;
if ( !keylen )
{
v7 = 3006;
goto LABEL_267;
}
key_index = (int *)PyNumber_Remainder(i, keylen);// 取余,实现了i%keylen
if ( !key_index )
{
v7 = 3008;
goto LABEL_267;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
s2 = _Pyx_PyObject_GetItem(key, key_index); // 按下标从key中取出元素,这里保存到了s2中
if ( !s2 )
{
v7 = 3011;
goto LABEL_267;
}
if ( *key_index >= 0 )
{
v39 = (*(_QWORD *)key_index)-- == 1LL;
if ( v39 )
Py_Dealloc(key_index);
}
v46 = *(_QWORD *)(s2 + 8);
key_index = 0LL;
v47 = *(_DWORD *)(v46 + 168);
if ( (v47 & 0x10000000) != 0 )
{
intkey = _Pyx_PyUnicode_AsPy_UCS4(s2); // 这里把s2按ascii码转为数字
goto LABEL_104;
}
if ( (v47 & 0x8000000) != 0 )
{
v49 = *(_QWORD *)(s2 + 16);
if ( v49 == 1 )
{
intkey = *(unsigned __int8 *)(s2 + 32);
goto LABEL_104;
}
LABEL_102:
v50 = "ord() expected a character, but string of length %zd found";// 熟悉的ord()函数,证实了上面的操作
goto LABEL_103;
}
if ( v46 == PyByteArray_Type || (unsigned int)PyType_IsSubtype() )
{
v49 = *(_QWORD *)(s2 + 16);
if ( v49 == 1 )
{
intkey = **(unsigned __int8 **)(s2 + 40);
goto LABEL_104;
}
goto LABEL_102;
}
v50 = "ord() expected string of length 1, but %.200s found";
v49 = *(_QWORD *)(*(_QWORD *)(s2 + 8) + 24LL);
LABEL_103:
PyErr_Format(PyExc_TypeError, v50, v49);
intkey = -1;
LABEL_104:
if ( intkey == -1 )
{
v7 = 3014;
goto LABEL_267;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
intkey1 = PyLong_FromLong(intkey); // 把获取的ascii数转为转为python对象
s2 = intkey1; // 赋值给s2
if ( !intkey1 )
{
v7 = 3016;
goto LABEL_267;
}
key_index = (int *)PyNumber_Add(s, intkey1);// 加法运算,s是上面的j+s[i],这里实现了操作(j + s[i]) + ord(key[i % len(key)])
if ( !key_index )
{
v7 = 3018;
LABEL_267:
v4 = 19;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
result = _Pyx_PyInt_RemainderObjC(key_index, _pyx->int_256);// 把加来的结果对256取余
if ( !result )
{
v4 = 19;
v7 = 3022;
goto LABEL_278;
}
if ( *key_index >= 0 )
{
v39 = (*(_QWORD *)key_index)-- == 1LL;
if ( v39 )
Py_Dealloc(key_index);
}
v53 = v83;
j = (int *)result; // 赋值给j,这里一起实现了j = ((j + s[i]) + ord(key[i % len(key)])) % 256
v54 = (int *)v83;
v83 = (_QWORD *)result;
if ( *v54 >= 0 )
{
v39 = (*v53)-- == 1LL;
if ( v39 )
Py_Dealloc(v54);
}
sbox1 = (__int64)sbox2;
s2 = _Pyx_PyObject_GetItem(sbox2, result); // 获取s[result]
if ( !s2 )
{
v4 = 20;
v7 = 3035;
count3 = sbox2;
goto LABEL_288;
}
key_index = (int *)_Pyx_PyObject_GetItem(sbox2, i);// 获取s[i]
if ( !key_index )
{
v4 = 20;
v7 = 3037;
goto LABEL_274;
}
if ( (int)PyObject_SetItem(sbox2, i, s2) < 0 )// s[i] = s2
{
v4 = 20;
v7 = 3039;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
if ( (int)PyObject_SetItem(sbox2, j, key_index) < 0 )// s[j] = key_index,这里实现了s[i]和s[j]的互换
{
v4 = 20;
v7 = 3041;
goto LABEL_278;
}
if ( *key_index >= 0 )
{
v39 = (*(_QWORD *)key_index)-- == 1LL;
if ( v39 )
Py_Dealloc(key_index);
}
LODWORD(count3) = (_DWORD)count3 + 1; // 循环计数加一
key_index = 0LL;
}
while ( (int)count3 < 256 ); // 循环255次
v55 = _pyx;
v56 = _pyx->int_0;
if ( *(_DWORD *)v56 != -1 )
++*(_DWORD *)v56;
v2 = v55->int_0;
v83 = v2;
if ( *j >= 0 )
{
v39 = (*(_QWORD *)j)-- == 1LL;
if ( v39 )
Py_Dealloc(j);
}
v57 = _pyx;
v58 = _pyx->int_0;
if ( *(_DWORD *)v58 != -1 )
++*(_DWORD *)v58;
iint0 = v57->int_0;
i2 = (int *)iint0; // 这里是i,初始化i=1,往下看可以推出
if ( *i >= 0 )
{
v39 = (*(_QWORD *)i)-- == 1LL;
if ( v39 )
Py_Dealloc(i);
}
v60 = m1;
key_index = m1;
if ( *m1 != -1 )
++*m1;
count3 = sbox2;
s2 = 0LL;
count4 = 0LL;
if ( *((__int64 *)m1 + 2) <= 0 )
{
LABEL_226:
if ( *v60 >= 0 )
{
v39 = (*(_QWORD *)v60)-- == 1LL;
if ( v39 )
Py_Dealloc(v60);
}
sj_y256 = (int *)flag;
v77 = (int *)_Pyx_PyObject_CallOneArg(callable, flag);
if ( !v77 )
{
v4 = 29;
v7 = 3202;
goto LABEL_290;
}
if ( *v77 >= 0 )
{
v39 = (*(_QWORD *)v77)-- == 1LL;
if ( v39 )
Py_Dealloc(v77);
}
s2 = Py_NoneStruct;
if ( Py_NoneStruct != -1 )
++Py_NoneStruct;
}
else
{
j2 = (int *)v2; // 由下面的判断这里是第二个循环的j,初始化j=0
m2 = m1; // 密文
while ( 1 )
{
m4 = *(int **)(*((_QWORD *)m2 + 3) + 8 * count4);// 截取了密文count4为密文索引,这里按顺序获取八位二进制密文
if ( *m4 != -1 )
++*m4;
v65 = m5;
++count4; // 下一个密文的索引
m5 = m4; // 赋值密文
if ( v65 )
{
if ( *v65 >= 0 )
{
v39 = (*(_QWORD *)v65)-- == 1LL;
if ( v39 )
Py_Dealloc(v65);
}
}
i_add1 = _Pyx_PyInt_AddObjC(iint0, _pyx->int_1);// 这里的iint0在上面赋值给了i2,也就是i,在下面又会把i的值赋值给iint0,所以这个变量也是i,这里实现了i+1
s2 = i_add1;
if ( !i_add1 )
break;
iY256 = _Pyx_PyInt_RemainderObjC(i_add1, _pyx->int_256);// (i+1)%256
if ( !iY256 )
{
sj_y256 = 0LL;
v4 = 24;
v7 = 3100;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
v68 = (int *)iint0;
i2 = (int *)iY256; // 可以确定s2是i,因为(i+1)%256杯赋值回了i
iY256_ = iY256;
if ( *v68 >= 0 )
{
v39 = (*(_QWORD *)v68)-- == 1LL;
if ( v39 )
Py_Dealloc(v68);
}
s_i = _Pyx_PyObject_GetItem(sbox2, iY256);// s[i]
s = s_i;
if ( !s_i )
{
sj_y256 = 0LL;
v7 = 3113;
goto LABEL_278;
}
s2 = PyNumber_Add(j2, s_i); // j+s[i]
if ( !s2 )
{
sj_y256 = 0LL;
v7 = 3115;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
j_add_si = _Pyx_PyInt_RemainderObjC(s2, _pyx->int_256);// (j+s[i])%256
if ( !j_add_si )
{
sj_y256 = 0LL;
v7 = 3118;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
v72 = j2;
v83 = (_QWORD *)j_add_si;
j2 = (int *)j_add_si; // 赋值回j
if ( *v72 >= 0 )
{
v39 = (*(_QWORD *)v72)-- == 1LL;
if ( v39 )
Py_Dealloc(v72);
}
s = _Pyx_PyObject_GetItem(sbox2, j_add_si);// 在盒中查找值s[j]
if ( !s )
{
sj_y256 = 0LL;
v4 = 26;
v7 = 3131;
goto LABEL_278;
}
s2 = _Pyx_PyObject_GetItem(sbox2, iY256_);// s[i]
if ( !s2 )
{
sj_y256 = 0LL;
v4 = 26;
v7 = 3133;
goto LABEL_268;
}
if ( (int)PyObject_SetItem(sbox2, iY256_, s) < 0 )
{
sj_y256 = 0LL;
v4 = 26;
v7 = 3135;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
if ( (int)PyObject_SetItem(sbox2, j2, s2) < 0 )// 交换s[i],s[j]
{
sj_y256 = 0LL;
v4 = 26;
v7 = 3137;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
s2 = _Pyx_PyObject_GetItem(sbox2, iY256_);// s2 = s[i]
if ( !s2 )
{
sj_y256 = 0LL;
v4 = 27;
v7 = 3147;
goto LABEL_278;
}
s_j2 = _Pyx_PyObject_GetItem(sbox2, j2); // s[j]
s = s_j2;
if ( !s_j2 )
{
sj_y256 = 0LL;
v4 = 27;
v7 = 3149;
goto LABEL_274;
}
sj_y256 = (int *)_Pyx_PyInt_RemainderObjC(s_j2, _pyx->int_256);// s[j]%256
if ( !sj_y256 )
{
v4 = 27;
v7 = 3151;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
s = PyNumber_Add(s2, sj_y256); // s = s[i] + s[j] %256
if ( !s )
{
v4 = 27;
v7 = 3154;
goto LABEL_274;
}
if ( *(int *)s2 >= 0 )
{
v39 = (*(_QWORD *)s2)-- == 1LL;
if ( v39 )
Py_Dealloc(s2);
}
s2 = 0LL;
if ( *sj_y256 >= 0 )
{
v39 = (*(_QWORD *)sj_y256)-- == 1LL;
if ( v39 )
Py_Dealloc(sj_y256);
}
sj_y256 = (int *)_Pyx_PyInt_RemainderObjC(s, _pyx->int_256);// s %256
if ( !sj_y256 )
{
v4 = 27;
v7 = 3158;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
v74 = v80;
v80 = sj_y256;
if ( *v74 >= 0 )
{
v39 = (*(_QWORD *)v74)-- == 1LL;
if ( v39 )
Py_Dealloc(v74);
}
s_j3 = _Pyx_PyObject_GetItem(sbox2, sj_y256);// s[sj_y256],这里实现了s[(s[i] + s[j] % 256) % 256]
sj_y256 = (int *)s_j3;
if ( !s_j3 )
{
v4 = 28;
v7 = 3170;
goto LABEL_278;
}
s = _Pyx_PyInt_RemainderObjC(s_j3, _pyx->int_256);// s[j]%256
if ( !s )
{
v4 = 28;
v7 = 3172;
goto LABEL_278;
}
if ( *sj_y256 >= 0 )
{
v39 = (*(_QWORD *)sj_y256)-- == 1LL;
if ( v39 )
Py_Dealloc(sj_y256);
}
sj_y256 = (int *)PyNumber_Xor(m5, s); // 密文与得到的结果异或,m ^ s[j] % 256
if ( !sj_y256 )
{
v4 = 28;
v7 = 3175;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
s = _Pyx_PyObject_CallOneArg(qword_18000BF40, sj_y256);// 猜这里是把异或的密文转成字符串
if ( !s )
{
v4 = 28;
v7 = 3178;
goto LABEL_278;
}
if ( *sj_y256 >= 0 )
{
v39 = (*(_QWORD *)sj_y256)-- == 1LL;
if ( v39 )
Py_Dealloc(sj_y256);
}
sj_y256 = (int *)PyNumber_InPlaceAdd(flag, s);// flag += s,这里实现了字符串加法
if ( !sj_y256 )
{
v4 = 28;
v7 = 3181;
goto LABEL_268;
}
if ( *(int *)s >= 0 )
{
v39 = (*(_QWORD *)s)-- == 1LL;
if ( v39 )
Py_Dealloc(s);
}
v76 = (int *)flag;
flag = (__int64 *)sj_y256; // 结果保存到flag中
if ( *v76 >= 0 )
{
v39 = (*(_QWORD *)v76)-- == 1LL;
if ( v39 )
Py_Dealloc(v76);
}
m2 = m1;
iint0 = (__int64 *)i2; // 赋值i到iint0中进行准备下一轮循环
if ( count4 >= *((_QWORD *)m1 + 2) )
{
v2 = (__int64 *)j2;
v60 = m1; // 跳转到上面实现循环,进行下一轮加密
goto LABEL_226;
}
}
......
_Pyx_AddTraceback("source.rc4", v7, v4, "source.pyx");
......
return s2;
}
dis导出密文
python中的dis()函数可以查看模块包含的对象,有时候可能会有密文之类的,可以直接print打印。
总结
我们发现一个cython函数里面其实有很多我们不需要分析的东西,像一些由许多跳转的if其实我们根本就不需要管,这些代码把一个操作分成了许多步,有一些操作其实我们心理大概有数就行了,对一个cython函数的分析,我们需要大胆的猜,对变量进行追踪,弄清关键数据操作的作用就行了,数据操作比较复杂时可以自己写伪代码记录一下。