8.2 File Fuzzer

File format vulnerabilitie 文件格式化漏洞已经渐渐的成为了客户端攻击的流行方式，而我们最感兴趣的就是找出文件格式化分析时出现的漏洞。无论面对的目标是杀毒软件还是文档阅读器，我们都希望测试库尽可能的全，最好是包含说有的文件格式。同时还要确保，我们的 fuzzer 能准确的捕捉到崩溃信息，然后自动化的决策出是否是可利用的漏洞。最后还要加入 emailing 的功能，在我们有成千上万的测试案例的时候，你不会想傻傻的做在机器前看数据流吧！

现在开始写代码，第一步，构造创建一个类框架，用于简单的文件选择。

#file_fuzzer.py
from pydbg import *
from pydbg.defines 
import * import utils
import random 
import sys 
import struct 
import threading 
import os
import shutil 
import time 
import getopt 
class file_fuzzer:
    def init (self, exe_path, ext, notify): 
        self.exe_path = exe_path
        self.ext = ext
        self.notify_crash = notify 
        self.orig_file = None 
        self.mutated_file = None 
        self.iteration = 0
        self.exe_path = exe_path
        self.orig_file = None 
        self.mutated_file = None
        self.iteration = 0
        self.crash = None 
        self.send_notify = False 
        self.pid = None
        self.in_accessv_handler = False 
        self.dbg = None
        self.running = False
        self.ready = False
        # Optional
        self.smtpserver = 'mail.nostarch.com' 
        self.recipients = ['[email protected]',] 
        self.sender = '[email protected]'
        self.test_cases = [ "%s%n%s%n%s%n", "\xff", "\x00", "A" ] 
    def file_picker( self ):
        file_list = os.listdir("examples/") 
        list_length = len(file_list)
        file = file_list[random.randint(0, list_length-1)] shutil.copy("examples\\%s" % file,"test.%s" % self.ext) 
        return file

类框架定义了一些全局变量，用于跟踪记录文件的基础信息，这些文件将会在变形后加入测试例。file_picker 函数使用内建的 Python 函数列出目录内的所有文件，然后随机选取一个进行变形。

接下来我们要做一些线程方面的工作：加载目标程序，跟踪崩溃信息，在文档分析完成之后终止目标程序。第一步，将目标程序加载进一个调试线程，并且安装自定义的访问违例处理代码。第二步，创建第二个线程，用于监视调试的线程，并且负责在一段长度的时间之后杀死调试线程。最后还得附加一段 email 提醒的代码。

#file_fuzzer.py
...
def fuzz( self ):
    while 1:
        if not self.running: #(1)
            # We first snag a file for mutation 
            self.test_file = self.file_picker() 
            self.mutate_file()
            # Start up the debugger thread 
            pydbg_thread = threading.Thread(target=self.start_debugger)
            pydbg_thread.setDaemon(0)
            pydbg_thread.start() 
            while self.pid == None:
                time.sleep(1)
            # Start up the monitoring thread 
            monitor_thread = threading.Thread (target=self.monitor_debugger) 
            monitor_thread.setDaemon(0) 
            monitor_thread.start()
        else:
            self.iteration += 1
            time.sleep(1)
# Our primary debugger thread that the application
# runs under
def start_debugger(self):
    print "[*] Starting debugger for iteration: %d" % self.iteration 
    self.running = True
    self.dbg = pydbg() 
    self.dbg.set_callback(EXCEPTION_ACCESS_VIOLATION,self.check_accessv) 
    pid = self.dbg.load(self.exe_path,"test.%s" % self.ext)
    self.pid = self.dbg.pid 
    self.dbg.run()
# Our access violation handler that traps the crash
# information and stores it 
def check_accessv(self,dbg):
    if dbg.dbg.u.Exception.dwFirstChance: 
        return DBG_CONTINUE
    print "[*] Woot! Handling an access violation!" 
    self.in_accessv_handler = True
    crash_bin = utils.crash_binning.crash_binning() 
    crash_bin.record_crash(dbg)
    self.crash = crash_bin.crash_synopsis()
    # Write out the crash informations
    crash_fd = open("crashes\\crash-%d" % self.iteration,"w") 
    crash_fd.write(self.crash)
    # Now back up the files
    shutil.copy("test.%s" % self.ext,"crashes\\%d.%s" % (self.iteration,self.ext))
    shutil.copy("examples\\%s" % self.test_file,"crashes\\%d_orig.%s" % (self.iteration,self.ext))
    self.dbg.terminate_process() self.in_accessv_handler = False 
    self.running = False
    return DBG_EXCEPTION_NOT_HANDLED
# This is our monitoring function that allows the application
# to run for a few seconds and then it terminates it 
def monitor_debugger(self):
    counter = 0
    print "[*] Monitor thread for pid: %d waiting." % self.pid, 
    while counter < 3:
        time.sleep(1) 
        print counter, 
        counter += 1
    if self.in_accessv_handler != True: 
        time.sleep(1) 
        self.dbg.terminate_process() 
        self.pid = None
        self.running = False
    else:
        print "[*] The access violation handler is doing its business. Waiting."
        while self.running: 
            time.sleep(1)
# Our emailing routine to ship out crash information 
def notify(self):
    crash_message = "From:%s\r\n\r\nTo:\r\n\r\nIteration: %d\n\nOutput:\n\n %s" % (self.sender, self.iteration, self.crash)
    session = smtplib.SMTP(smtpserver) 
    session.sendmail(sender, recipients, crash_message) 
    session.quit()
    return

我们已经有了个比较完整的流程，能够顺利的完成 fuzz 了，让我们简单的看看各个函数的作用。第一步，通过 self.running 确保当前只有一个调试线程在执行或者访问违例的处理程序没有在搜集崩溃数据。第二步，我们把随即选择到文件，传入变形函数，这个函数会在稍后实现。

一旦文件变形完成，第三步，我们就创建一个调试线程，启动目标程序，并将上面随即选中的文件的路径名字，作为命令行参数传入。接着一个条件循环，等待目标进程的创建。当程序创建成功的时候，得到新的 PID，第四步，创建一个监视进程，确保在一段事件以后杀死调试的程序。监视线程创建成功以后，我们就增加统计标志，然后加入主循环，等待一次 fuzz 的完成，继续下一次 fuzz。现在让我们增加一个简单的变形函数。

#file_fuzzer.py
...
def mutate_file( self ):
    # Pull the contents of the file into a buffer 
    fd = open("test.%s" % self.ext, "rb") 
    stream = fd.read()
    fd.close()
    # The fuzzing meat and potatoes, really simple
    # Take a random test case and apply it to a random position
    # in the file
    test_case = self.test_cases[random.randint(0,len(self.test_cases)-1)] 
    stream_length = len(stream)
    rand_offset = random.randint(0, stream_length - 1 ) 
    rand_len = random.randint(1, 1000)
    # Now take the test case and repeat it 
    test_case = test_case * rand_len
    # Apply it to the buffer, we are just
    # splicing in our fuzz data 
    fuzz_file = stream[0:rand_offset]
    fuzz_file += str(test_case) 
    fuzz_file += stream[rand_offset:]
    # Write out the file
    fd = open("test.%s" % self.ext, "wb") 
    fd.write( fuzz_file )
    fd.close() 
    return

这是一个基础的变形函数。我们从全部测试用例中随即的选取一个；然后同样随即的获取一个文件位移和需要附加的 fuzz 数据的长度。用位移和长度信息生成附加的 fuzz 数据，最后将原始数据分片，在其中加入 fuzz 数据。一切完成后，把新生成的文件覆盖原来的文件。紧接着就是调试线程开始新一轮的测试了。现在让我们实现命令行处理部分。

#file_fuzzer.py
...
def print_usage(): 
    print "[*]"
    print "[*] file_fuzzer.py -e <Executable Path> -x <File Extension>" 
    print "[*]"
    sys.exit(0)
if name == " main ":
print "[*] Generic File Fuzzer."
# This is the path to the document parser
# and the filename extension to use 
try:
    opts, argo = getopt.getopt(sys.argv[1:],"e:x:n") 
except getopt.GetoptError:
    print_usage() 
exe_path = None 
ext = None
notify = False
for o,a in opts:
if o == "-e":
    exe_path = a 
elif o == "-x":
    ext = a 
elif o == "-n":
    notify = True
if exe_path is not None and ext is not None: 
    fuzzer = file_fuzzer( exe_path, ext, notify ) 
    fuzzer.fuzz()
else:
    print_usage()

现在我们的 file_fuzzer.py 脚本已经能够接收到命令行参数了。-e 标志指示需要 fuzz 的目标程序的路径。-x 选项是我们需要用于测试的文件的扩展名；举个例子.txt 就说明我们要用文本文件作为测试数据。-n 选项告诉 fuzzer 是否要接收通知。

最好的测试 fuzzer 的方法，就是在测试目标程序的时候观察数据的变形结果。在 fuzz 文本文件的时候，用 Windows 记事本是再好不过的了。因为你能够直接的看到每一次的数据的变化，比用十六进制编辑器和二进制对比工具方便很多。在启动 file_fuzzer.py 脚本之前，需要在脚本当前目录下新建两个目录 examples 和 crashes 。然后在 examples 目录下存放几个以.txt 结尾的文件，接着使用如下命令启动脚本。

python file_fuzzer.py -e C:\\WINDOWS\\system32\\notepad.exe -x .txt

随着记事本的启动，你能看到被变形过的文件。在对变形之后的数据满意以后，你就可以使用这个 file fuzzer 测试别的程序了。