LOADING

加载过慢请开启缓存 浏览器默认开启

Cyhon逆向初步学习

我们只进行一些总结,基础理解我参考了这篇文章

Cython编写

系统环境:windows,python编译器pycharm(python虚拟环境.venv),c/c++编译器visual stdio 2022

1.在python中安装cython和setuptools包

建议给Cython单独开一个python虚拟环境,有时候包多了导入pyd会报错。

pycharm可以直接使用包管理器搜索安装。

pip install cython
pip install setuptools

2.安装c\c++编译器

我这里用的是visual stdio,也可以用MinGW。

3.编写cython拓展

可以参考用户指南 - 《Cython 3.0 中文文档》 - 书栈网 · BookStack

随意编写一个source.py作为拓展模块,代码如下

def sayhello():
    a = "hello world"
    print(a)
    
cdef int add(int n,int m):
    return m+n

def aadd(n,m):
    print(add(n,m))

还需要一个编译文件,这里我们用myc.py,代码如下

from setuptools import setup
from Cython.Build import cythonize

setup(ext_modules=cythonize("source.pyx",language_level=3))

"""
cythonize函数用于把cython编译成c文件
setup把c文件生成拓展模块
cythonize第一个参数是要编译的pyx或者py文件(与本文件在同一目录下),
language_level=3表示在python3环境下运行,默认python2和3通用
"""

用cmd编译拓展模块

在上面文件所在目录打开cmd并启动我们的python虚拟环境(不是虚拟环境的跳过),输入的命令如下,我这里是”E:\python projects\cython\.venv\Scripts\activate”

"项目根目录\.venv\Scripts\activate"

再输入一下命令进行编译,我这里是python myc.py build

python [编译文件] build

python [编译文件] build_ext --inplace #这个指令直接把拓展模块放到当前目录而不是build目录

当前目录下的build文件夹中的lib文件夹就是我们的库文件.pyd,我们直使用这个库文件就行,其他的可以删除。把pyd文件放到工作目录下,直接按名字导入即可使用。

image-20250116220849678

cython还可以导入c文件,这个先后面再说吧,好像有点问题

from setuptools import setup, Extension
from Cython.Build import cythonize

setup(ext_modules=cythonize(Extension(name="myc", sources=["source.pyx", "rc4enc.cpp"]),language_level=3))

Cython逆向

我们自己编译一份cython文件分析,这里我实现了一个RC4加密

source.pyx

def sayhello():
    a = "hello world"
    print(a)

cdef int add(int n,int m):
    return m+n

def aadd(n,m):
    print(add(n,m))

def rc4():
    x=[0x1c,0x05,0x96,0xd3,0xf1,0x62,0x90,0x7d,0x17,0x86,0xd6,0x63,0xd8,0xe2,0x9a,0x93,0x44,0x6b,0xee,0xeb,0x4d,0x6c,0xcc,0x6c,0x38]
    key='key!key!'
    flag = ''
    j = 0
    c = x
    s = list(range(256))
    for i in range(256):
        j = ((j + s[i]) + ord(key[i % len(key)])) % 256
        s[i], s[j] = s[j], s[i]
    j = 0
    i = 0
    for r in c:
        i = (i + 1) % 256
        j = (j + s[i]) % 256
        s[i], s[j] = s[j], s[i]
        x = (s[i] + s[j] % 256) % 256
        flag += chr(r ^ s[x] % 256)
    print(flag)

imp.py

import source

source.sayhello()
source.aadd(10,10)
source.rc4()

# hello world
# 20
# flag{yes!_this_is_cython}

恢复符号

用help函数可以看到模块的一些信息

import source
help(source)

"""
返回结果
Help on module source:

NAME  #模块名
    source

FUNCTIONS #包含的函数
    aadd(n, m)

    rc4()

    sayhello()

DATA
    __test__ = {}

FILE #源文件
    e:\python projects\cython\source.cp313-win_amd64.pyd

"""

我们把得到的pyd文件拖入ida分析。如果是.so文件函数的符号是被去掉了,我们可以在linux下随便编译一个python版本相同且内容最好要相似的cython文件,用bindiff来恢复符号表。(python版本在ida的Imports窗口可以看到”我这里是python313,即为3.13”)

根据上面的信息我们用linux编译一个差不多的cython模块。这里我用linux的pycharm部署了python3.13的cython环境。

sudo apt install python3.13

sudo apt-get install python3.13-dev 

source.pyx

def sayhello():
    return 1
def aadd(n, m):
    return 1
def rc4():
    return 1

mycython.py同上

在目录下用cython编译”./.venv/bin/python3.13 myc.py build_ext –inplace”

image-20250121153722273

把so文件复制出来,用ida打开一次获得idb数据库,bindiff导入恢复符号。

这里有一个细节,就是windows下的cython文件默认是会去掉符号的,而linux却不会。所以如果是.pyd文件的话,我们要用下面的脚本进行编译来保留符号。

mycthon.py

from setuptools import setup,Extension
from Cython.Build import cythonize
 
module = Extension(
    "MyPyd",
    sources=["MyPyd.py"],
    extra_compile_args=["/Zi"],  # 生成调试信息
    extra_link_args=["/DEBUG"],  # 生成 PDB 文件
)
setup(
    ext_modules = cythonize(module,annotate=True)
)

source.pyx同上面linux的

编译后会生成.pyd文件和.pdb(在build文件夹里面),用ida打开.pyd就可以导入.pdb文件,再用源文件与ida生成的.idb文件用bindiff插件即可恢复符号(ctrl+6)。

重建结构体

_Pyx_CreateStringTabAndInitStrings()保存了要使用的字符串常量,用于加载字符串。

_ Pyx_InitConstants()保存了要使用的数字常量。

为了更容易阅读代码中对常量的引用我们要重建一下结构体。

我们先在off_18000B6A8按”Y“把类型设置为”__int64*”

image-20250121203045027

我们先定义一个结构体,结构体的大小最好大于等于n(off_18000B6A8 + n),我这里是27,我们就创建一个有27个__int64元素的结构体。

image-20250121203541840

//一个结构体模板_Pyx_CreateStringTabAndInitStrings()
typedef struct __strings {
    __int64* str_0;
    __int64* str_1;
    __int64* str_2;
    __int64* str_3;
    __int64* str_4;
    __int64* str_5;
    __int64* str_6;
    __int64* str_7;
    __int64* str_8;
    __int64* str_9;
    __int64* str_10;
    __int64* str_11;
    __int64* str_12;
    __int64* str_13;
    __int64* str_14;
    __int64* str_15;
    __int64* str_16;
    __int64* str_17;
    __int64* str_18;
    __int64* str_19;
    __int64* str_20;
    __int64* str_21;
    __int64* str_22;
    __int64* str_23;
    __int64* str_24;
    __int64* str_25;
    __int64* str_26;
    __int64* str_27;
    __int64* str_28;
    __int64* str_29;
    __int64* str_30;
    __int64* str_31;
    __int64* str_32;
    __int64* str_33;
    __int64* str_34;
    __int64* str_35;
    __int64* str_36;
    __int64* str_37;
    __int64* str_38;
    __int64* str_39;
    __int64* str_40;
    __int64* str_41;
    __int64* str_42;
    __int64* str_43;
    __int64* str_44;
    __int64* str_45;
    __int64* str_46;
    __int64* str_47;
    __int64* str_48;
    __int64* str_49;
    __int64* str_50;
    __int64* str_51;
    __int64* str_52;
    __int64* str_53;
    __int64* str_54;
    __int64* str_55;
    __int64* str_56;
    __int64* str_57;
    __int64* str_58;
    __int64* str_59;
    __int64* str_60;
    __int64* str_61;
    __int64* str_62;
    __int64* str_63;
} str;
//一个结构体模板__Pyx_InitConstants()
typedef struct _iint {
    __int64* int_0;
    __int64* int_1;
    __int64* int_2;
    __int64* int_3;
    __int64* int_4;
    __int64* int_5;
    __int64* int_6;
    __int64* int_7;
    __int64* int_8;
    __int64* int_9;
    __int64* int_10;
    __int64* int_11;
    __int64* int_12;
    __int64* int_13;
    __int64* int_14;
    __int64* int_15;
    __int64* int_16;
    __int64* int_17;
    __int64* int_18;
    __int64* int_19;
    __int64* int_20;
    __int64* int_21;
    __int64* int_22;
    __int64* int_23;
    __int64* int_24;
    __int64* int_25;
    __int64* int_26;
    __int64* int_27;
    __int64* int_28;
    __int64* int_29;
    __int64* int_30;
    __int64* int_31;
    __int64* int_32;
    __int64* int_33;
    __int64* int_34;
    __int64* int_35;
    __int64* int_36;
    __int64* int_37;
    __int64* int_38;
    __int64* int_39;
    __int64* int_40;
    __int64* int_41;
    __int64* int_42;
    __int64* int_43;
    __int64* int_44;
    __int64* int_45;
    __int64* int_46;
    __int64* int_47;
    __int64* int_48;
    __int64* int_49;
    __int64* int_50;
    __int64* int_51;
    __int64* int_52;
    __int64* int_53;
    __int64* int_54;
    __int64* int_55;
    __int64* int_56;
    __int64* int_57;
    __int64* int_58;
    __int64* int_59;
    __int64* int_60;
    __int64* int_61;
    __int64* int_62;
    __int64* int_63;
} iint;

_Pyx_InitConstants()中数字常量结构体创建的脚本,看情况修改xxxxxxxxx处的值

import re

# 输入_Pyx_InitConstants()的内容
content = '''

'''

def modify_struct(content):
    # 创建一个字典映射 PyLong_FromLong 的值到结构体成员名称
    mapping = {}
    pattern = re.compile(r'PyLong_FromLong\((\d+)LL\);')

    # 查找 content 中所有 PyLong_FromLong 的值
    for match,i in zip(re.finditer(pattern, content),range(64)):  #range()内表示数据的最大数量XXXXXXXX
        num = int(match.group(1))
        mapping[num] = i + 37  # 假设从 37 开始,结构体开始的位置XXXXXXX

    # 替换结构体定义中的成员名
    for num, idx in mapping.items():
        # 计算成员名
        old_member = f'int_{idx}'
        new_member = f'__int64* int_{num}'
        print(new_member)

modify_struct(content)

先修改字符串的。按偏移量把元素名字修改成好阅读的

typedef struct __strings {
    __int64* str_0;
    __int64* str_1;
    __int64* str_2;
    __int64* str_3;
    __int64* str_4;
    __int64* str_5;
    __int64* str_6;
    __int64* str_7;
    __int64* str_wenhao;
    __int64* str_unk_180009078;
    __int64* str_aadd;
    __int64* str_asyncio_coroutines;
    __int64* str_unk_1800090B0;
    __int64* str_chr;
    __int64* str___class_getitem__;
    __int64* str_cline_in_traceback;
    __int64* str_flag;
    __int64* str_hellow_orld;
    __int64* str_i;
    __int64* str__is_coroutine;
    __int64* str_j;
    __int64* str_key;
    __int64* str_keykey;
    __int64* str_m;
    __int64* str___main__;
    __int64* str_n;
    __int64* str___name__;
    __int64* str_print;
    __int64* str_r;
    __int64* str_range;
    __int64* str_rc4;
    __int64* str_s;
    __int64* str_sayhello;
    __int64* str_source;
    __int64* str_sourcedpyx;
    __int64* str___test__;
    __int64* str_x;
} str;

同样再重建一下_Pyx_InitConstants()内的结构体,数字常量的开头索引就是37,我们就截取从37开始的结构体,用脚本进行重命名,中间要是有不连续的索引记得看情况补上一个成员。

typedef struct _iint {
    __int64* int_37;
    __int64* int_38;
    __int64* int_39;
    __int64* int_40;
    __int64* int_41;
    __int64* int_42;
    __int64* int_43;
    __int64* int_44;
    __int64* int_45;
    __int64* int_46;
    __int64* int_47;
    __int64* int_48;
    __int64* int_49;
    __int64* int_50;
    __int64* int_51;
    __int64* int_52;
    __int64* int_53;
    __int64* int_54;
    __int64* int_55;
    __int64* int_56;
    __int64* int_57;
    __int64* int_58;
    __int64* int_59;
    __int64* int_60;
    __int64* int_61;
    __int64* int_62;
    __int64* int_63;
} iint;
import re

content = '''v0 = PyLong_FromLong(0LL);
  *((_QWORD *)_pyx + 37) = v0;
  if ( !v0 )
    return 0xFFFFFFFFLL;
  v1 = PyLong_FromLong(1LL);
  *((_QWORD *)_pyx + 38) = v1;
  if ( !v1 )
    return 0xFFFFFFFFLL;
  v2 = PyLong_FromLong(5LL);
  *((_QWORD *)_pyx + 39) = v2;
  if ( !v2 )
    return 0xFFFFFFFFLL;
  v3 = PyLong_FromLong(23LL);
  *((_QWORD *)_pyx + 40) = v3;
  if ( !v3 )
    return 0xFFFFFFFFLL;
  v4 = PyLong_FromLong(28LL);
  *((_QWORD *)_pyx + 41) = v4;
  if ( !v4 )
    return 0xFFFFFFFFLL;
  v5 = PyLong_FromLong(56LL);
  *((_QWORD *)_pyx + 42) = v5;
  if ( !v5 )
    return 0xFFFFFFFFLL;
  v6 = PyLong_FromLong(68LL);
  *((_QWORD *)_pyx + 43) = v6;
  if ( !v6 )
    return 0xFFFFFFFFLL;
  v7 = PyLong_FromLong(77LL);
  *((_QWORD *)_pyx + 44) = v7;
  if ( !v7 )
    return 0xFFFFFFFFLL;
  v8 = PyLong_FromLong(98LL);
  *((_QWORD *)_pyx + 45) = v8;
  if ( !v8 )
    return 0xFFFFFFFFLL;
  v9 = PyLong_FromLong(99LL);
  *((_QWORD *)_pyx + 46) = v9;
  if ( !v9 )
    return 0xFFFFFFFFLL;
  v10 = PyLong_FromLong(107LL);
  *((_QWORD *)_pyx + 47) = v10;
  if ( !v10 )
    return 0xFFFFFFFFLL;
  v11 = PyLong_FromLong(108LL);
  *((_QWORD *)_pyx + 48) = v11;
  if ( !v11 )
    return 0xFFFFFFFFLL;
  v12 = PyLong_FromLong(125LL);
  *((_QWORD *)_pyx + 49) = v12;
  if ( !v12 )
    return 0xFFFFFFFFLL;
  v13 = PyLong_FromLong(134LL);
  *((_QWORD *)_pyx + 50) = v13;
  if ( !v13 )
    return 0xFFFFFFFFLL;
  v14 = PyLong_FromLong(144LL);
  *((_QWORD *)_pyx + 51) = v14;
  if ( !v14 )
    return 0xFFFFFFFFLL;
  v15 = PyLong_FromLong(147LL);
  *((_QWORD *)_pyx + 52) = v15;
  if ( !v15 )
    return 0xFFFFFFFFLL;
  v16 = PyLong_FromLong(150LL);
  *((_QWORD *)_pyx + 53) = v16;
  if ( !v16 )
    return 0xFFFFFFFFLL;
  v17 = PyLong_FromLong(154LL);
  *((_QWORD *)_pyx + 54) = v17;
  if ( !v17 )
    return 0xFFFFFFFFLL;
  v18 = PyLong_FromLong(204LL);
  *((_QWORD *)_pyx + 55) = v18;
  if ( !v18 )
    return 0xFFFFFFFFLL;
  v19 = PyLong_FromLong(211LL);
  *((_QWORD *)_pyx + 56) = v19;
  if ( !v19 )
    return 0xFFFFFFFFLL;
  v20 = PyLong_FromLong(214LL);
  *((_QWORD *)_pyx + 57) = v20;
  if ( !v20 )
    return 0xFFFFFFFFLL;
  v21 = PyLong_FromLong(216LL);
  *((_QWORD *)_pyx + 58) = v21;
  if ( !v21 )
    return 0xFFFFFFFFLL;
  v22 = PyLong_FromLong(226LL);
  *((_QWORD *)_pyx + 59) = v22;
  if ( !v22 )
    return 0xFFFFFFFFLL;
  v23 = PyLong_FromLong(235LL);
  *((_QWORD *)_pyx + 60) = v23;
  if ( !v23 )
    return 0xFFFFFFFFLL;
  v24 = PyLong_FromLong(238LL);
  *((_QWORD *)_pyx + 61) = v24;
  if ( !v24 )
    return 0xFFFFFFFFLL;
  v25 = PyLong_FromLong(241LL);
  *((_QWORD *)_pyx + 62) = v25;
  if ( !v25 )
    return 0xFFFFFFFFLL;
  v26 = PyLong_FromLong(256LL);
  *((_QWORD *)_pyx + 63) = v26;
'''

def modify_struct(content):
    # 创建一个字典映射 PyLong_FromLong 的值到结构体成员名称
    mapping = {}
    pattern = re.compile(r'PyLong_FromLong\((\d+)LL\);')

    # 查找 content 中所有 PyLong_FromLong 的值
    for match,i in zip(re.finditer(pattern, content),range(64)):  #range()内表示数据的最大数量XXXXXXXX
        num = int(match.group(1))
        mapping[num] = i + 37  # 假设从 37 开始,结构体开始的位置XXXXXXX

    # 替换结构体定义中的成员名
    for num, idx in mapping.items():
        # 计算成员名
        old_member = f'int_{idx}'
        new_member = f'__int64* int_{num}'
        print(new_member)

modify_struct(content)
typedef struct _iint {
    __int64* int_0;
    __int64* int_1;
    __int64* int_5;
    __int64* int_23;
    __int64* int_28;
    __int64* int_56;
    __int64* int_68;
    __int64* int_77;
    __int64* int_98;
    __int64* int_99;
    __int64* int_107;
    __int64* int_108;
    __int64* int_125;
    __int64* int_134;
    __int64* int_144;
    __int64* int_147;
    __int64* int_150;
    __int64* int_154;
    __int64* int_204;
    __int64* int_211;
    __int64* int_214;
    __int64* int_216;
    __int64* int_226;
    __int64* int_235;
    __int64* int_238;
    __int64* int_241;
    __int64* int_256;
} iint;

如果_Pyx_CreateStringTabAndInitStrings()和_ Pyx_InitConstants()使用的是相同的基地址(都是off_18000B6A8),我们把两个结构体合并,两个结构体之间连接的索引要连续。如果不相同,就分别修复结构体就行。

typedef struct data {
    __int64* str_0;
    __int64* str_1;
    __int64* str_2;
    __int64* str_3;
    __int64* str_4;
    __int64* str_5;
    __int64* str_6;
    __int64* str_7;
    __int64* str_wenhao;
    __int64* str_unk_180009078;
    __int64* str_aadd;
    __int64* str_asyncio_coroutines;
    __int64* str_unk_1800090B0;
    __int64* str_chr;
    __int64* str___class_getitem__;
    __int64* str_cline_in_traceback;
    __int64* str_flag;
    __int64* str_hellow_orld;
    __int64* str_i;
    __int64* str__is_coroutine;
    __int64* str_j;
    __int64* str_key;
    __int64* str_keykey;
    __int64* str_m;
    __int64* str___main__;
    __int64* str_n;
    __int64* str___name__;
    __int64* str_print;
    __int64* str_r;
    __int64* str_range;
    __int64* str_rc4;
    __int64* str_s;
    __int64* str_sayhello;
    __int64* str_source;
    __int64* str_sourcedpyx;
    __int64* str___test__;
    __int64* str_x;
    __int64* int_0;
    __int64* int_1;
    __int64* int_5;
    __int64* int_23;
    __int64* int_28;
    __int64* int_56;
    __int64* int_68;
    __int64* int_77;
    __int64* int_98;
    __int64* int_99;
    __int64* int_107;
    __int64* int_108;
    __int64* int_125;
    __int64* int_134;
    __int64* int_144;
    __int64* int_147;
    __int64* int_150;
    __int64* int_154;
    __int64* int_204;
    __int64* int_211;
    __int64* int_214;
    __int64* int_216;
    __int64* int_226;
    __int64* int_235;
    __int64* int_238;
    __int64* int_241;
    __int64* int_256;
} data;

在local type窗口添加这个结构体。再按Y把off_18000B6A8的类型改为结构体的名字(这里是data),再按n把off_18000B6A8重命名一下。

image-20250121212022308

image-20250121224830153

记得检查一下结果是否对应正确。

逆向分析

不认识的函数可以查抽象对象层

_Pyx_AddTraceback()用于记录调用栈信息,会写入当前函数在模块中的名字,一般为”模块名.函数名”,这使得我们可以在字符串表中找到我们需要逆向的函数。

首先我们分析一下source.sayhello

image-20250122013829773

①处的处理逻辑(gpt辅助)

在 Cython 编译成 C 代码后,Python 对象在底层是通过 PyObject 结构体来表示的,而这个结构体包含了对象的多种信息,其中就包括引用计数。

typedef struct {
    PyObject_VAR_HEAD
    PyObject *ob_type;  // 类型信息
    Py_ssize_t ob_refcnt;  // 引用计数器
} PyObject;
  1. 异常捕捉:
    第一部分的 if (v3) 检查了一个条件,可能是一个对象指针。接下来的代码是尝试进行某种资源的释放或对象的内存管理:

    • 检查 *v3 >= 0,这可能是在检查指针指向的对象的状态。
    • v5 = (*(_QWORD *)v3)-- == 1LL; v3 强制类型转换为指向 64 位整型的指针,直接访问 v3 指向的 PyObject 结构体中的 ob_refcnt 字段,检查对象引用计数是否为 1,并且在引用计数为 1 时调用 Py_Dealloc 来释放对象。
    • v4 = Py_NoneStruct;if (Py_NoneStruct != -1) ++Py_NoneStruct; 这部分可能是与 None 对象的引用计数有关,Py_NoneStruct 是一个指向 Python None 的结构体指针。
  2. 异常回溯:
    如果 v3NULL 或者其他异常情况,代码会调用 _Pyx_AddTraceback 记录调用栈信息:

    _Pyx_AddTraceback("source.sayhello", 2562LL, 3LL, "source.pyx"); //函数,行号,栈帧的深度,原文件名称
    

    这表示在 source.pyx 文件的某个地方发生了问题。

source.aadd

// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall _pyx_pw_6source_3aadd(__int64 a1, __int64 *a2, __int64 a3, __int64 a4)
{
......
  *(_QWORD *)&input = _Pyx_GetKwValue_FASTCALL(a4, v5, _pyx->str_n);// 以字典获取关键字参数,a4是指向字典(kwargs)的指针,v5是存储返回结果的变量,n是参数名,结果返回到input[0]
  input_b = input;
  if ( !(_QWORD)input )
  {
    if ( PyErr_Occurred() )
    {
      v11 = 2684LL;
      goto LABEL_26;
    }
    goto LABEL_25;
  }
  v7 = _pyx;
  --v10;
LABEL_13:
  *((_QWORD *)&input + 1) = _Pyx_GetKwValue_FASTCALL(a4, v5, v7->str_m);// 同上,返回参数m的值到input[1]中
  if ( !*((_QWORD *)&input + 1) )               // 一些异常处理
  {
    if ( PyErr_Occurred() )
    {
      v11 = 2692LL;
    }
    else
    {
      _Pyx_RaiseArgtupleInvalid(v13, v12, v14, v15, 1LL);// 检查函数参数个数
      v11 = 2694LL;
    }
    goto LABEL_26;
  }
  --v10;
LABEL_15:
  if ( v10 > 0 )
  {
    if ( (int)_Pyx_ParseOptionalKeywords(a4, (_DWORD)v5, (unsigned int)v26, a4, (__int64)&input, a3) < 0 )// 处理函数调用中的可选关键字参数把值解析到对应的函数参数中
    {
      v11 = 2699LL;
LABEL_26:
      _Pyx_AddTraceback("source.aadd", v11, 8LL, "source.pyx");
      return 0LL;
    }
    input_b = input;                            // 赋值
  }
LABEL_28:
  input0 = _Pyx_PyInt_As_int(input_b);          // 从python对象里提出一个值转为c的int类型
  if ( input0 == -1 && PyErr_Occurred() )
  {
    v18 = 2759LL;
  }
  else
  {
    input1 = _Pyx_PyInt_As_int(*((_QWORD *)&input + 1));
    if ( input1 == -1 && PyErr_Occurred() )
    {
      v18 = 2760LL;
    }
    else
    {
      input_add = input0 + input1;              // m + n 
      if ( input_add == -1 && PyErr_Occurred() )
      {
        v18 = 2761LL;
      }
      else
      {
        input_add_pyint = PyLong_FromLong(input_add);// 结果转为python的数字对象
        v22 = (int *)input_add_pyint;
        if ( input_add_pyint )
        {
          v23 = (int *)_Pyx_PyObject_CallOneArg(return, input_add_pyint);// 调用函数return返回相加结果  
      ......

source.rc4

// write access to const memory has been detected, the output may be wrong!
__int64 _pyx_pf_6source_4rc4()
{
 ......
  s2 = 0LL;
  sj_y256 = 0LL;
  v2 = 0LL;
  count3 = 0LL;
  v4 = 25;
  key_index = 0LL;
  v80 = 0LL;
  key = 0LL;
  m1 = 0LL;
  sbox2 = 0LL;
  i2 = 0LL;
  m5 = 0LL;
  m = PyList_New(25LL);                         // 创建了一个大小为25的列表
  if ( !m )
  {
    v4 = 12;
    v7 = 2845;
    goto LABEL_290;
  }
  v8 = _pyx;                                    // 赋值
  int_28 = _pyx->int_28;   //把密文加载到列表m中
  if ( *(_DWORD *)int_28 != -1 )
    ++*(_DWORD *)int_28;
  **(_QWORD **)(m + 24) = v8->int_28;
......
  if ( *(_DWORD *)int_56 != -1 )
    ++*(_DWORD *)int_56;
  v80 = (int *)m;
  *(_QWORD *)(*(_QWORD *)(m + 24) + 192LL) = v8->int_56;// 上面都是在给列表赋值,值就是一些数字常量,这是密文
  str_keykey = v8->str_keykey;
  if ( *(_DWORD *)str_keykey != -1 )
    ++*(_DWORD *)str_keykey;
  str_7 = v8->str_7;
  key = v8->str_keykey;                         // 加载密钥
  if ( *(_DWORD *)str_7 != -1 )
    ++*(_DWORD *)str_7;
  int_0 = v8->int_0;
  flag = v8->str_7;
  if ( *(_DWORD *)int_0 != -1 )
    ++*(_DWORD *)int_0;
  j = (int *)v8->int_0;
  v83 = j;
  if ( *(_DWORD *)m != -1 )
    ++*(_DWORD *)m;
  m1 = (int *)m;
  s = _Pyx_PyObject_Call(qword_18000BF48, v8[1].str_0);// 调用一个对象,参数分别是调用对象名和参数,猜测返回值是256的那个表
  if ( !s )
  {
    v4 = 17;
    v7 = 2972;
    goto LABEL_289;
  }
  if ( *(_QWORD *)(s + 8) == PyList_Type && *(_QWORD *)s == 1LL )// 判断返回值是否是列表,引用计数是否为1
  {
    *(_DWORD *)s = 2;
    s2 = s;                                     // 把列表赋值给v0
  }
  else
  {
    s2 = PySequence_List(s);                    // 把python对象转成列表,赋值给v0
    if ( !s2 )
    {
      v4 = 17;
      v7 = 2974;
LABEL_268:
      if ( *(int *)s >= 0 )                     // 判断是否释放对象
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      if ( !s2 )
      {
LABEL_277:
        if ( key_index )
          goto LABEL_278;
        goto LABEL_281;
      }
LABEL_274:
      if ( *(int *)s2 >= 0 )
      {
        v39 = (*(_QWORD *)s2)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s2);
      }
      goto LABEL_277;
    }
  }
  if ( *(int *)s >= 0 )
  {
    v39 = (*(_QWORD *)s)-- == 1LL;
    if ( v39 )
      Py_Dealloc(s);
  }
  sbox1 = s2;                                   // 表
  sbox2 = (int *)s2;                            // 表,暂时分不清类型是什么,感觉一个是地址,一个是元素
  do
  {
    count1 = PyLong_FromLong((unsigned int)count3);// v3的值是0,赋值给count1
    i = (int *)count1;                          // 赋值count1给i,i由下面推出
    if ( !count1 )
    {
      v4 = 18;
      v7 = 2988;
      goto LABEL_287;
    }
    v43 = i2;
    i2 = (int *)count1;                         // 赋值count1到count2
    if ( v43 )
    {
      if ( *v43 >= 0 )
      {
        v39 = (*(_QWORD *)v43)-- == 1LL;
        if ( v39 )
          Py_Dealloc(v43);
      }
    }
    Item = _Pyx_PyObject_GetItem(sbox1, i);     // 从盒中获取元素s[i]
    s2 = Item;
    if ( !Item )
    {
      v4 = 19;
      v7 = 3000;
      goto LABEL_287;
    }
    s = PyNumber_Add(j, Item);                  // j的初始值是0,这里实现了j+s[i]
    if ( !s )
    {
      v4 = 19;
      v7 = 3002;
      goto LABEL_274;
    }
    if ( *(int *)s2 >= 0 )
    {
      v39 = (*(_QWORD *)s2)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s2);
    }
    s2 = 0LL;
    if ( key[2] == -1 )                         // 这里使用了key,下面的操作可能有关于key
    {
      v7 = 3005;
      goto LABEL_267;
    }
    keylen = PyLong_FromSsize_t();              // 由下面可以更确定i,同时得知这里在获取key的大小
    s2 = keylen;
    if ( !keylen )
    {
      v7 = 3006;
      goto LABEL_267;
    }
    key_index = (int *)PyNumber_Remainder(i, keylen);// 取余,实现了i%keylen
    if ( !key_index )
    {
      v7 = 3008;
      goto LABEL_267;
    }
    if ( *(int *)s2 >= 0 )
    {
      v39 = (*(_QWORD *)s2)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s2);
    }
    s2 = _Pyx_PyObject_GetItem(key, key_index); // 按下标从key中取出元素,这里保存到了s2中
    if ( !s2 )
    {
      v7 = 3011;
      goto LABEL_267;
    }
    if ( *key_index >= 0 )
    {
      v39 = (*(_QWORD *)key_index)-- == 1LL;
      if ( v39 )
        Py_Dealloc(key_index);
    }
    v46 = *(_QWORD *)(s2 + 8);
    key_index = 0LL;
    v47 = *(_DWORD *)(v46 + 168);
    if ( (v47 & 0x10000000) != 0 )
    {
      intkey = _Pyx_PyUnicode_AsPy_UCS4(s2);    // 这里把s2按ascii码转为数字
      goto LABEL_104;
    }
    if ( (v47 & 0x8000000) != 0 )
    {
      v49 = *(_QWORD *)(s2 + 16);
      if ( v49 == 1 )
      {
        intkey = *(unsigned __int8 *)(s2 + 32);
        goto LABEL_104;
      }
LABEL_102:
      v50 = "ord() expected a character, but string of length %zd found";// 熟悉的ord()函数,证实了上面的操作
      goto LABEL_103;
    }
    if ( v46 == PyByteArray_Type || (unsigned int)PyType_IsSubtype() )
    {
      v49 = *(_QWORD *)(s2 + 16);
      if ( v49 == 1 )
      {
        intkey = **(unsigned __int8 **)(s2 + 40);
        goto LABEL_104;
      }
      goto LABEL_102;
    }
    v50 = "ord() expected string of length 1, but %.200s found";
    v49 = *(_QWORD *)(*(_QWORD *)(s2 + 8) + 24LL);
LABEL_103:
    PyErr_Format(PyExc_TypeError, v50, v49);
    intkey = -1;
LABEL_104:
    if ( intkey == -1 )
    {
      v7 = 3014;
      goto LABEL_267;
    }
    if ( *(int *)s2 >= 0 )
    {
      v39 = (*(_QWORD *)s2)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s2);
    }
    intkey1 = PyLong_FromLong(intkey);          // 把获取的ascii数转为转为python对象
    s2 = intkey1;                               // 赋值给s2
    if ( !intkey1 )
    {
      v7 = 3016;
      goto LABEL_267;
    }
    key_index = (int *)PyNumber_Add(s, intkey1);// 加法运算,s是上面的j+s[i],这里实现了操作(j + s[i]) + ord(key[i % len(key)])
    if ( !key_index )
    {
      v7 = 3018;
LABEL_267:
      v4 = 19;
      goto LABEL_268;
    }
    if ( *(int *)s >= 0 )
    {
      v39 = (*(_QWORD *)s)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s);
    }
    if ( *(int *)s2 >= 0 )
    {
      v39 = (*(_QWORD *)s2)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s2);
    }
    result = _Pyx_PyInt_RemainderObjC(key_index, _pyx->int_256);// 把加来的结果对256取余
    if ( !result )
    {
      v4 = 19;
      v7 = 3022;
      goto LABEL_278;
    }
    if ( *key_index >= 0 )
    {
      v39 = (*(_QWORD *)key_index)-- == 1LL;
      if ( v39 )
        Py_Dealloc(key_index);
    }
    v53 = v83;
    j = (int *)result;                          // 赋值给j,这里一起实现了j = ((j + s[i]) + ord(key[i % len(key)])) % 256
    v54 = (int *)v83;
    v83 = (_QWORD *)result;
    if ( *v54 >= 0 )
    {
      v39 = (*v53)-- == 1LL;
      if ( v39 )
        Py_Dealloc(v54);
    }
    sbox1 = (__int64)sbox2;
    s2 = _Pyx_PyObject_GetItem(sbox2, result);  // 获取s[result]
    if ( !s2 )
    {
      v4 = 20;
      v7 = 3035;
      count3 = sbox2;
      goto LABEL_288;
    }
    key_index = (int *)_Pyx_PyObject_GetItem(sbox2, i);// 获取s[i]
    if ( !key_index )
    {
      v4 = 20;
      v7 = 3037;
      goto LABEL_274;
    }
    if ( (int)PyObject_SetItem(sbox2, i, s2) < 0 )// s[i] = s2
    {
      v4 = 20;
      v7 = 3039;
      goto LABEL_274;
    }
    if ( *(int *)s2 >= 0 )
    {
      v39 = (*(_QWORD *)s2)-- == 1LL;
      if ( v39 )
        Py_Dealloc(s2);
    }
    if ( (int)PyObject_SetItem(sbox2, j, key_index) < 0 )// s[j] = key_index,这里实现了s[i]和s[j]的互换
    {
      v4 = 20;
      v7 = 3041;
      goto LABEL_278;
    }
    if ( *key_index >= 0 )
    {
      v39 = (*(_QWORD *)key_index)-- == 1LL;
      if ( v39 )
        Py_Dealloc(key_index);
    }
    LODWORD(count3) = (_DWORD)count3 + 1;       // 循环计数加一
    key_index = 0LL;
  }
  while ( (int)count3 < 256 );                  // 循环255次
  v55 = _pyx;
  v56 = _pyx->int_0;
  if ( *(_DWORD *)v56 != -1 )
    ++*(_DWORD *)v56;
  v2 = v55->int_0;
  v83 = v2;
  if ( *j >= 0 )
  {
    v39 = (*(_QWORD *)j)-- == 1LL;
    if ( v39 )
      Py_Dealloc(j);
  }
  v57 = _pyx;
  v58 = _pyx->int_0;
  if ( *(_DWORD *)v58 != -1 )
    ++*(_DWORD *)v58;
  iint0 = v57->int_0;
  i2 = (int *)iint0;                            // 这里是i,初始化i=1,往下看可以推出
  if ( *i >= 0 )
  {
    v39 = (*(_QWORD *)i)-- == 1LL;
    if ( v39 )
      Py_Dealloc(i);
  }
  v60 = m1;
  key_index = m1;
  if ( *m1 != -1 )
    ++*m1;
  count3 = sbox2;
  s2 = 0LL;
  count4 = 0LL;
  if ( *((__int64 *)m1 + 2) <= 0 )
  {
LABEL_226:
    if ( *v60 >= 0 )
    {
      v39 = (*(_QWORD *)v60)-- == 1LL;
      if ( v39 )
        Py_Dealloc(v60);
    }
    sj_y256 = (int *)flag;
    v77 = (int *)_Pyx_PyObject_CallOneArg(callable, flag);
    if ( !v77 )
    {
      v4 = 29;
      v7 = 3202;
      goto LABEL_290;
    }
    if ( *v77 >= 0 )
    {
      v39 = (*(_QWORD *)v77)-- == 1LL;
      if ( v39 )
        Py_Dealloc(v77);
    }
    s2 = Py_NoneStruct;
    if ( Py_NoneStruct != -1 )
      ++Py_NoneStruct;
  }
  else
  {
    j2 = (int *)v2;                             // 由下面的判断这里是第二个循环的j,初始化j=0
    m2 = m1;                                    // 密文
    while ( 1 )
    {
      m4 = *(int **)(*((_QWORD *)m2 + 3) + 8 * count4);// 截取了密文count4为密文索引,这里按顺序获取八位二进制密文
      if ( *m4 != -1 )
        ++*m4;
      v65 = m5;
      ++count4;                                 // 下一个密文的索引
      m5 = m4;                                  // 赋值密文
      if ( v65 )
      {
        if ( *v65 >= 0 )
        {
          v39 = (*(_QWORD *)v65)-- == 1LL;
          if ( v39 )
            Py_Dealloc(v65);
        }
      }
      i_add1 = _Pyx_PyInt_AddObjC(iint0, _pyx->int_1);// 这里的iint0在上面赋值给了i2,也就是i,在下面又会把i的值赋值给iint0,所以这个变量也是i,这里实现了i+1
      s2 = i_add1;
      if ( !i_add1 )
        break;
      iY256 = _Pyx_PyInt_RemainderObjC(i_add1, _pyx->int_256);// (i+1)%256
      if ( !iY256 )
      {
        sj_y256 = 0LL;
        v4 = 24;
        v7 = 3100;
        goto LABEL_274;
      }
      if ( *(int *)s2 >= 0 )
      {
        v39 = (*(_QWORD *)s2)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s2);
      }
      v68 = (int *)iint0;
      i2 = (int *)iY256;                        // 可以确定s2是i,因为(i+1)%256杯赋值回了i
      iY256_ = iY256;
      if ( *v68 >= 0 )
      {
        v39 = (*(_QWORD *)v68)-- == 1LL;
        if ( v39 )
          Py_Dealloc(v68);
      }
      s_i = _Pyx_PyObject_GetItem(sbox2, iY256);// s[i]
      s = s_i;
      if ( !s_i )
      {
        sj_y256 = 0LL;
        v7 = 3113;
        goto LABEL_278;
      }
      s2 = PyNumber_Add(j2, s_i);               // j+s[i]
      if ( !s2 )
      {
        sj_y256 = 0LL;
        v7 = 3115;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      j_add_si = _Pyx_PyInt_RemainderObjC(s2, _pyx->int_256);// (j+s[i])%256
      if ( !j_add_si )
      {
        sj_y256 = 0LL;
        v7 = 3118;
        goto LABEL_274;
      }
      if ( *(int *)s2 >= 0 )
      {
        v39 = (*(_QWORD *)s2)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s2);
      }
      v72 = j2;
      v83 = (_QWORD *)j_add_si;
      j2 = (int *)j_add_si;                     // 赋值回j
      if ( *v72 >= 0 )
      {
        v39 = (*(_QWORD *)v72)-- == 1LL;
        if ( v39 )
          Py_Dealloc(v72);
      }
      s = _Pyx_PyObject_GetItem(sbox2, j_add_si);// 在盒中查找值s[j]
      if ( !s )
      {
        sj_y256 = 0LL;
        v4 = 26;
        v7 = 3131;
        goto LABEL_278;
      }
      s2 = _Pyx_PyObject_GetItem(sbox2, iY256_);// s[i]
      if ( !s2 )
      {
        sj_y256 = 0LL;
        v4 = 26;
        v7 = 3133;
        goto LABEL_268;
      }
      if ( (int)PyObject_SetItem(sbox2, iY256_, s) < 0 )
      {
        sj_y256 = 0LL;
        v4 = 26;
        v7 = 3135;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      if ( (int)PyObject_SetItem(sbox2, j2, s2) < 0 )// 交换s[i],s[j]
      {
        sj_y256 = 0LL;
        v4 = 26;
        v7 = 3137;
        goto LABEL_274;
      }
      if ( *(int *)s2 >= 0 )
      {
        v39 = (*(_QWORD *)s2)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s2);
      }
      s2 = _Pyx_PyObject_GetItem(sbox2, iY256_);// s2 = s[i]
      if ( !s2 )
      {
        sj_y256 = 0LL;
        v4 = 27;
        v7 = 3147;
        goto LABEL_278;
      }
      s_j2 = _Pyx_PyObject_GetItem(sbox2, j2);  // s[j]
      s = s_j2;
      if ( !s_j2 )
      {
        sj_y256 = 0LL;
        v4 = 27;
        v7 = 3149;
        goto LABEL_274;
      }
      sj_y256 = (int *)_Pyx_PyInt_RemainderObjC(s_j2, _pyx->int_256);// s[j]%256
      if ( !sj_y256 )
      {
        v4 = 27;
        v7 = 3151;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      s = PyNumber_Add(s2, sj_y256);            // s = s[i] + s[j] %256
      if ( !s )
      {
        v4 = 27;
        v7 = 3154;
        goto LABEL_274;
      }
      if ( *(int *)s2 >= 0 )
      {
        v39 = (*(_QWORD *)s2)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s2);
      }
      s2 = 0LL;
      if ( *sj_y256 >= 0 )
      {
        v39 = (*(_QWORD *)sj_y256)-- == 1LL;
        if ( v39 )
          Py_Dealloc(sj_y256);
      }
      sj_y256 = (int *)_Pyx_PyInt_RemainderObjC(s, _pyx->int_256);// s %256
      if ( !sj_y256 )
      {
        v4 = 27;
        v7 = 3158;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      v74 = v80;
      v80 = sj_y256;
      if ( *v74 >= 0 )
      {
        v39 = (*(_QWORD *)v74)-- == 1LL;
        if ( v39 )
          Py_Dealloc(v74);
      }
      s_j3 = _Pyx_PyObject_GetItem(sbox2, sj_y256);// s[sj_y256],这里实现了s[(s[i] + s[j] % 256) % 256]
      sj_y256 = (int *)s_j3;
      if ( !s_j3 )
      {
        v4 = 28;
        v7 = 3170;
        goto LABEL_278;
      }
      s = _Pyx_PyInt_RemainderObjC(s_j3, _pyx->int_256);// s[j]%256
      if ( !s )
      {
        v4 = 28;
        v7 = 3172;
        goto LABEL_278;
      }
      if ( *sj_y256 >= 0 )
      {
        v39 = (*(_QWORD *)sj_y256)-- == 1LL;
        if ( v39 )
          Py_Dealloc(sj_y256);
      }
      sj_y256 = (int *)PyNumber_Xor(m5, s);     // 密文与得到的结果异或,m ^ s[j] % 256
      if ( !sj_y256 )
      {
        v4 = 28;
        v7 = 3175;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      s = _Pyx_PyObject_CallOneArg(qword_18000BF40, sj_y256);// 猜这里是把异或的密文转成字符串
      if ( !s )
      {
        v4 = 28;
        v7 = 3178;
        goto LABEL_278;
      }
      if ( *sj_y256 >= 0 )
      {
        v39 = (*(_QWORD *)sj_y256)-- == 1LL;
        if ( v39 )
          Py_Dealloc(sj_y256);
      }
      sj_y256 = (int *)PyNumber_InPlaceAdd(flag, s);// flag += s,这里实现了字符串加法
      if ( !sj_y256 )
      {
        v4 = 28;
        v7 = 3181;
        goto LABEL_268;
      }
      if ( *(int *)s >= 0 )
      {
        v39 = (*(_QWORD *)s)-- == 1LL;
        if ( v39 )
          Py_Dealloc(s);
      }
      v76 = (int *)flag;
      flag = (__int64 *)sj_y256;                // 结果保存到flag中
      if ( *v76 >= 0 )
      {
        v39 = (*(_QWORD *)v76)-- == 1LL;
        if ( v39 )
          Py_Dealloc(v76);
      }
      m2 = m1;
      iint0 = (__int64 *)i2;                    // 赋值i到iint0中进行准备下一轮循环
      if ( count4 >= *((_QWORD *)m1 + 2) )
      {
        v2 = (__int64 *)j2;
        v60 = m1;                               // 跳转到上面实现循环,进行下一轮加密
        goto LABEL_226;
      }
    }
 ......
    _Pyx_AddTraceback("source.rc4", v7, v4, "source.pyx");
 ......
  return s2;
}

dis导出密文

python中的dis()函数可以查看模块包含的对象,有时候可能会有密文之类的,可以直接print打印。

总结

我们发现一个cython函数里面其实有很多我们不需要分析的东西,像一些由许多跳转的if其实我们根本就不需要管,这些代码把一个操作分成了许多步,有一些操作其实我们心理大概有数就行了,对一个cython函数的分析,我们需要大胆的猜,对变量进行追踪,弄清关键数据操作的作用就行了,数据操作比较复杂时可以自己写伪代码记录一下。