Poc_Scanner/new_poc_tools.py

257 lines
11 KiB
Python
Raw Permalink Normal View History

2024-10-09 15:15:50 +08:00
import os
import time
import logging
import tldextract
import base64
import urllib.parse
import sys
import docx
import os
2024-10-11 17:54:44 +08:00
import re
2024-10-09 15:15:50 +08:00
import warnings
import requests
import argparse
from docx.shared import Cm #单位换算函数
from bs4 import BeautifulSoup
from docx import Document
from docx.oxml.ns import qn
from base_tool import validate_main, check_url_status
from screenshot import screenshot
from colorama import init, Fore
# 初始化 colorama
init()
os.system("")
warnings.filterwarnings("ignore")
# 配置日志记录
# logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%H:%M')
DEFAULT_HEADERS = {
'Accept': '*/*',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Referer': 'https://www.baidu.com',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'keep-alive',
}
def get_company_name(url):
# 发送HTTP请求获取网页内容
response = requests.get(url, headers=DEFAULT_HEADERS, verify=False)
# 检查请求是否成功
if response.status_code != 200:
print(f"Failed to retrieve the page. Status code: {response.status_code}")
return None
# 解析HTML
soup = BeautifulSoup(response.content, 'html.parser')
# 查找公司名称的<a>标签
2024-10-11 17:54:44 +08:00
company_name_tag = soup.find('div', {'tag': 'company_name'})
2024-10-09 15:15:50 +08:00
# 提取公司名称
if company_name_tag:
2024-10-11 17:54:44 +08:00
text = company_name_tag.text
cleaned_text = re.sub(r'[^\w\s]', '', text) # 去除英文符号
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # 去除多余空格和换行符
return cleaned_text
# return company_name_tag.text
2024-10-09 15:15:50 +08:00
else:
print("公司名称未找到")
return None
def get_website_title(url):
try:
# 发送HTTP请求获取网页内容
response = requests.get(url, headers=DEFAULT_HEADERS, verify=False)
response.raise_for_status() # 检查请求是否成功
except requests.exceptions.RequestException as e:
print(f"Failed to retrieve the page: {e}")
return None
# 解析HTML
soup = BeautifulSoup(response.content, 'html.parser')
# 查找<title>标签
title_tag = soup.find('title')
# 提取标题内容
if title_tag:
return title_tag.text
else:
print("网站标题未找到")
return None
def extract_domains_from_file(file_path):
domains = []
try:
with open(file_path, 'r') as file:
for line in file:
domains.append(line.strip())
except FileNotFoundError:
logging.error(f"文件未找到: {file_path}")
except Exception as e:
logging.error(f"读取文件时出错: {e}")
return domains
def create_document():
document = Document()
document.styles['Normal'].font.name = 'Times New Roman'
document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')
return document
def extract_root_domain(url):
extracted = tldextract.extract(url)
root_domain = f"{extracted.domain}.{extracted.suffix}"
2024-10-11 17:54:44 +08:00
# encoded_bytes = base64.b64encode(root_domain.encode('utf-8'))
# encoded_str = encoded_bytes.decode('utf-8')
return urllib.parse.quote(root_domain)
2024-10-09 15:15:50 +08:00
def add_scan_results_to_document(document, domain, results, include_all, description, choice_3):
for name, result, status_code, url, res_time in results:
if include_all or result == "存在漏洞":
2024-10-12 13:35:03 +08:00
company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain))
2024-10-09 15:15:50 +08:00
document.add_heading(f"目标:{domain}", level=3)
document.add_paragraph(f"漏洞名称:{name}")
2024-10-12 13:35:03 +08:00
document.add_paragraph(f"公司名称:{company_name}")
2024-10-09 15:15:50 +08:00
document.add_paragraph(f"漏洞链接:{url}")
2024-10-12 13:35:03 +08:00
document.add_paragraph(f"响应状态:{status_code}")
2024-10-09 15:15:50 +08:00
document.add_paragraph(f"响应时间:{res_time}")
document.add_paragraph(f"漏洞情况:{result}")
document.add_paragraph("\n")
2024-10-12 13:35:03 +08:00
2024-10-09 15:15:50 +08:00
if result == "存在漏洞" and choice_3 == "y":
screenshot_path_1 = screenshot(url)
# print(screenshot_path_1)
2024-10-14 16:48:04 +08:00
# 站长工具反爬,该截图路径已废弃
2024-10-11 17:54:44 +08:00
# screenshot_path_2 = screenshot("https://icp.chinaz.com/home/info?host=" + extract_root_domain(domain))
2024-10-09 15:15:50 +08:00
# print(screenshot_path_2)
#word处理部分
#导入模板
doc = docx.Document("./file/模板.docx")
#固定重复部分,以下对应模板中:网站域名,漏洞名称,测试用例
#word中对应的位置分别为网站名称网站域名漏洞名称测试用例
title = get_website_title(domain)
doc.paragraphs[2].text = title
doc.paragraphs[4].text = domain
doc.paragraphs[6].text = name
doc.paragraphs[8].text = url
doc.paragraphs[10].text = description
#验证截图添加图片对应word的位置
paragraph = doc.paragraphs[10] #图片位置
#添加图片部分宽高自行调整单位cm
run = paragraph.add_run()
if screenshot_path_1:
run.add_picture(screenshot_path_1, width=Cm(16.52), height=Cm(9.13)) #添加图片
2024-10-11 17:54:44 +08:00
# run.add_picture(screenshot_path_2, width=Cm(16.52), height=Cm(9.13)) #添加ICP备案图片已废弃寻找新方法
2024-10-09 15:15:50 +08:00
doc_save_path = './file/result/'
if not os.path.exists(doc_save_path):
os.mkdir(doc_save_path)
#保存word根据需要自行更改
2024-10-12 13:35:03 +08:00
2024-10-09 15:15:50 +08:00
doc_name = str(company_name) + "_" + name + ".docx"
doc.save(doc_save_path + doc_name)
2024-10-14 16:48:04 +08:00
2024-10-11 17:54:44 +08:00
def mass_poc_scan(domains, include_all, choice_2, docx_name, status):
2024-10-09 15:15:50 +08:00
document = create_document()
2024-10-14 16:48:04 +08:00
current_domain = None # 用于记录当前正在扫描的域名
2024-10-09 15:15:50 +08:00
try:
for domain in domains:
logging.info(f"正在扫描域名:{domain}")
2024-10-14 16:48:04 +08:00
current_domain = domain # 记录当前正在扫描的域名
2024-10-11 17:54:44 +08:00
if status == 'y':
if not check_url_status(domain):
logging.warning(f"访问失败,跳过当前域名的扫描:{domain}")
print("--------------------------------------------------")
if choice_2.lower() == 'y':
2024-10-14 16:48:04 +08:00
document.add_heading(f"目标:{domain} 无法访问!", level=3) # 将标题升级为level=3
2024-10-11 17:54:44 +08:00
continue
2024-10-09 15:15:50 +08:00
2024-10-11 17:54:44 +08:00
try:
results, description = validate_main(domain)
add_scan_results_to_document(document, domain, results, include_all, description, choice_3)
except Exception as e:
logging.error(f"扫描域名 {domain} 时出错:{e}")
print("--------------------------------------------------")
else:
try:
results, description = validate_main(domain)
add_scan_results_to_document(document, domain, results, include_all, description, choice_3)
except Exception as e:
logging.error(f"扫描域名 {domain} 时出错:{e}")
print("--------------------------------------------------")
2024-10-14 16:48:04 +08:00
2024-10-09 15:15:50 +08:00
except KeyboardInterrupt:
2024-10-14 16:48:04 +08:00
print(Fore.RED + '\n检测到Ctrl+C中断程序')
if current_domain is not None:
print(f"当前正在扫描的域名为:{current_domain}")
else:
print("暂未开始扫描计划。")
print(Fore.RESET)
2024-10-09 15:15:50 +08:00
save_document(document, docx_name)
def save_document(document, docx_name):
timestamp = str(int(time.time()))
# 如果 docx_name 为空,则结束程序
if not docx_name:
logging.info("程序结束!")
sys.exit()
results_dir = "./file/report/"
if not os.path.exists(results_dir):
os.makedirs(results_dir)
document.save(f"{results_dir}/{docx_name}_{timestamp}.docx")
logging.info("扫描报告已生成!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="大部分参数在启动程序后输入,若需要启用默认模式,请查看以下说明。")
parser.add_argument('--batch', action='store_true', help='启用批处理模式默认使用urls目标/删除无洞链接/生成报告')
parser.add_argument('-n', '--name', type=str, help='输入文件名称')
args = parser.parse_args()
if args.batch:
# 批处理模式
print("默认模式(默认目标文档/筛选漏洞链接/生成报告)!")
file_path = "./urls.txt"
include_all = False
choice_3 = 'y'
2024-10-11 17:54:44 +08:00
status = 'y'
2024-10-09 15:15:50 +08:00
else:
# 交互模式
choice = input(Fore.BLUE + "请问是否需要输入其他目标文件?(y/n): " + Fore.RESET).lower()
if choice == 'n':
print("默认目标文档urls.txt")
file_path = "./urls.txt"
else:
file_path = input(Fore.BLUE + "请输入需要扫描的目标文件:" + Fore.RESET)
print("--------------------------------------------------")
domains = extract_domains_from_file(file_path)
2024-10-11 17:54:44 +08:00
status = input(Fore.BLUE + "请问是否需要检查目标网站存活状态?(y/n): " + Fore.RESET).lower()
print("--------------------------------------------------")
2024-10-09 15:15:50 +08:00
choice_2 = input(Fore.BLUE + "请问是否删除无漏洞网站记录?(y/n): " + Fore.RESET).lower()
include_all = choice_2 != 'y'
print("--------------------------------------------------")
choice_3 = input(Fore.BLUE + "请问是否生成漏洞报告?(y/n): " + Fore.RESET).lower()
print("--------------------------------------------------")
# 执行扫描
domains = extract_domains_from_file(file_path)
if args.batch:
2024-10-11 17:54:44 +08:00
mass_poc_scan(domains, include_all, choice_3, args.name, status)
2024-10-09 15:15:50 +08:00
else:
docx_name = input(Fore.BLUE + "请输入总报告文件名(回车可跳过生成报告步骤)" + Fore.RESET)
print("--------------------------------------------------")
2024-10-11 17:54:44 +08:00
mass_poc_scan(domains, include_all, choice_3, docx_name, status)
# if __name__ == "__main__":
# domain = 'http://vr.sh-fit.com:9090'
# company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain))
# print(company_name)