Poc_Scanner/new_poc_tools.py
2024-10-14 16:48:04 +08:00

257 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import time
import logging
import tldextract
import base64
import urllib.parse
import sys
import docx
import os
import re
import warnings
import requests
import argparse
from docx.shared import Cm #单位换算函数
from bs4 import BeautifulSoup
from docx import Document
from docx.oxml.ns import qn
from base_tool import validate_main, check_url_status
from screenshot import screenshot
from colorama import init, Fore
# 初始化 colorama
init()
os.system("")
warnings.filterwarnings("ignore")
# 配置日志记录
# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%H:%M')
DEFAULT_HEADERS = {
'Accept': '*/*',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Referer': 'https://www.baidu.com',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
}
def get_company_name(url):
# 发送HTTP请求获取网页内容
response = requests.get(url, headers=DEFAULT_HEADERS, verify=False)
# 检查请求是否成功
if response.status_code != 200:
print(f"Failed to retrieve the page. Status code: {response.status_code}")
return None
# 解析HTML
soup = BeautifulSoup(response.content, 'html.parser')
# 查找公司名称的<a>标签
company_name_tag = soup.find('div', {'tag': 'company_name'})
# 提取公司名称
if company_name_tag:
text = company_name_tag.text
cleaned_text = re.sub(r'[^\w\s]', '', text) # 去除英文符号
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # 去除多余空格和换行符
return cleaned_text
# return company_name_tag.text
else:
print("公司名称未找到")
return None
def get_website_title(url):
try:
# 发送HTTP请求获取网页内容
response = requests.get(url, headers=DEFAULT_HEADERS, verify=False)
response.raise_for_status() # 检查请求是否成功
except requests.exceptions.RequestException as e:
print(f"Failed to retrieve the page: {e}")
return None
# 解析HTML
soup = BeautifulSoup(response.content, 'html.parser')
# 查找<title>标签
title_tag = soup.find('title')
# 提取标题内容
if title_tag:
return title_tag.text
else:
print("网站标题未找到")
return None
def extract_domains_from_file(file_path):
domains = []
try:
with open(file_path, 'r') as file:
for line in file:
domains.append(line.strip())
except FileNotFoundError:
logging.error(f"文件未找到: {file_path}")
except Exception as e:
logging.error(f"读取文件时出错: {e}")
return domains
def create_document():
document = Document()
document.styles['Normal'].font.name = 'Times New Roman'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
return document
def extract_root_domain(url):
extracted = tldextract.extract(url)
root_domain = f"{extracted.domain}.{extracted.suffix}"
# encoded_bytes = base64.b64encode(root_domain.encode('utf-8'))
# encoded_str = encoded_bytes.decode('utf-8')
return urllib.parse.quote(root_domain)
def add_scan_results_to_document(document, domain, results, include_all, description, choice_3):
for name, result, status_code, url, res_time in results:
if include_all or result == "存在漏洞":
company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain))
document.add_heading(f"目标:{domain}", level=3)
document.add_paragraph(f"漏洞名称:{name}")
document.add_paragraph(f"公司名称:{company_name}")
document.add_paragraph(f"漏洞链接:{url}")
document.add_paragraph(f"响应状态:{status_code}")
document.add_paragraph(f"响应时间:{res_time}")
document.add_paragraph(f"漏洞情况:{result}")
document.add_paragraph("\n")
if result == "存在漏洞" and choice_3 == "y":
screenshot_path_1 = screenshot(url)
# print(screenshot_path_1)
# 站长工具反爬,该截图路径已废弃
# screenshot_path_2 = screenshot("https://icp.chinaz.com/home/info?host=" + extract_root_domain(domain))
# print(screenshot_path_2)
#word处理部分
#导入模板
doc = docx.Document("./file/模板.docx")
#固定重复部分,以下对应模板中:网站域名,漏洞名称,测试用例
#word中对应的位置分别为网站名称网站域名漏洞名称测试用例
title = get_website_title(domain)
doc.paragraphs[2].text = title
doc.paragraphs[4].text = domain
doc.paragraphs[6].text = name
doc.paragraphs[8].text = url
doc.paragraphs[10].text = description
#验证截图添加图片对应word的位置
paragraph = doc.paragraphs[10] #图片位置
#添加图片部分宽高自行调整单位cm
run = paragraph.add_run()
if screenshot_path_1:
run.add_picture(screenshot_path_1, width=Cm(16.52), height=Cm(9.13)) #添加图片
# run.add_picture(screenshot_path_2, width=Cm(16.52), height=Cm(9.13)) #添加ICP备案图片已废弃寻找新方法
doc_save_path = './file/result/'
if not os.path.exists(doc_save_path):
os.mkdir(doc_save_path)
#保存word根据需要自行更改
doc_name = str(company_name) + "_" + name + ".docx"
doc.save(doc_save_path + doc_name)
def mass_poc_scan(domains, include_all, choice_2, docx_name, status):
document = create_document()
current_domain = None # 用于记录当前正在扫描的域名
try:
for domain in domains:
logging.info(f"正在扫描域名:{domain}")
current_domain = domain # 记录当前正在扫描的域名
if status == 'y':
if not check_url_status(domain):
logging.warning(f"访问失败,跳过当前域名的扫描:{domain}")
print("--------------------------------------------------")
if choice_2.lower() == 'y':
document.add_heading(f"目标:{domain} 无法访问!", level=3) # 将标题升级为level=3
continue
try:
results, description = validate_main(domain)
add_scan_results_to_document(document, domain, results, include_all, description, choice_3)
except Exception as e:
logging.error(f"扫描域名 {domain} 时出错:{e}")
print("--------------------------------------------------")
else:
try:
results, description = validate_main(domain)
add_scan_results_to_document(document, domain, results, include_all, description, choice_3)
except Exception as e:
logging.error(f"扫描域名 {domain} 时出错:{e}")
print("--------------------------------------------------")
except KeyboardInterrupt:
print(Fore.RED + '\n检测到Ctrl+C中断程序')
if current_domain is not None:
print(f"当前正在扫描的域名为:{current_domain}")
else:
print("暂未开始扫描计划。")
print(Fore.RESET)
save_document(document, docx_name)
def save_document(document, docx_name):
timestamp = str(int(time.time()))
# 如果 docx_name 为空,则结束程序
if not docx_name:
logging.info("程序结束!")
sys.exit()
results_dir = "./file/report/"
if not os.path.exists(results_dir):
os.makedirs(results_dir)
document.save(f"{results_dir}/{docx_name}_{timestamp}.docx")
logging.info("扫描报告已生成!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="大部分参数在启动程序后输入,若需要启用默认模式,请查看以下说明。")
parser.add_argument('--batch', action='store_true', help='启用批处理模式默认使用urls目标/删除无洞链接/生成报告')
parser.add_argument('-n', '--name', type=str, help='输入文件名称')
args = parser.parse_args()
if args.batch:
# 批处理模式
print("默认模式(默认目标文档/筛选漏洞链接/生成报告)!")
file_path = "./urls.txt"
include_all = False
choice_3 = 'y'
status = 'y'
else:
# 交互模式
choice = input(Fore.BLUE + "请问是否需要输入其他目标文件?(y/n): " + Fore.RESET).lower()
if choice == 'n':
print("默认目标文档urls.txt")
file_path = "./urls.txt"
else:
file_path = input(Fore.BLUE + "请输入需要扫描的目标文件:" + Fore.RESET)
print("--------------------------------------------------")
domains = extract_domains_from_file(file_path)
status = input(Fore.BLUE + "请问是否需要检查目标网站存活状态?(y/n): " + Fore.RESET).lower()
print("--------------------------------------------------")
choice_2 = input(Fore.BLUE + "请问是否删除无漏洞网站记录?(y/n): " + Fore.RESET).lower()
include_all = choice_2 != 'y'
print("--------------------------------------------------")
choice_3 = input(Fore.BLUE + "请问是否生成漏洞报告?(y/n): " + Fore.RESET).lower()
print("--------------------------------------------------")
# 执行扫描
domains = extract_domains_from_file(file_path)
if args.batch:
mass_poc_scan(domains, include_all, choice_3, args.name, status)
else:
docx_name = input(Fore.BLUE + "请输入总报告文件名(回车可跳过生成报告步骤)" + Fore.RESET)
print("--------------------------------------------------")
mass_poc_scan(domains, include_all, choice_3, docx_name, status)
# if __name__ == "__main__":
# domain = 'http://vr.sh-fit.com:9090'
# company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain))
# print(company_name)