언어/python
text 섹션 ngram 추출 코드 작업 중 ...
파아랑새
2018. 7. 4. 22:38
import pefile
import re
import numpy as np
import pickle
def main():
f = re.compile(pattern=".text")
targetPath = "C:/Users/sleep/Desktop/driver/chromedriver.exe"
pe_data = pefile.PE(targetPath)
tmp = []
n_gram = []
with open(targetPath, 'rb') as binary_file:
for sec in pe_data.sections:
# .text section 만 추출하겠습니다. !!!
if f.search(sec.Name.decode('utf-8')):
ptrRaw, sizeRaw, sizeEnd = \
sec.PointerToRawData//0x40, (sec.SizeOfRawData-16)//0x40, (sec.SizeOfRawData-16)%0x40
print ("ptrRaw => {}, "
"sizeRaw => {}, "
"sizeEnd => {} ".format(ptrRaw, sizeRaw, sizeEnd))
for i in range(65):
s = binary_file.read(0x10)
tmp.extend([ hex(i) for i in s[:]])
# print ("{0:02x}".format(i), end= " ")
# print ("\n")
#
for i in range(sizeRaw):
s = binary_file.read(0x10)
tmp.extend([hex(i) for i in s[:]])
for i in range(1, len(tmp)-5):
n_gram.append(tmp[i:i+4])
# for i in s[:]:
# print("{0:02x}".format(i), end=" ")
# print ("\n")
# indx = 1
# for i in range(0, len(text_section[0]), 0x10):
# print ("{0:x} => ".format(indx), end=" ")
# for k in text_section[0][i:i+0xf]:
# print ("{0:02x}".format(k), end=" ")
# print ('\n')
# indx += 1
print (n_gram)
if __name__ == "__main__":
main()