import os import time import logging import tldextract import base64 import urllib.parse import sys import docx import os import re import warnings import requests import argparse from docx.shared import Cm #单位换算函数 from bs4 import BeautifulSoup from docx import Document from docx.oxml.ns import qn from base_tool import validate_main, check_url_status from screenshot import screenshot from colorama import init, Fore # 初始化 colorama init() os.system("") warnings.filterwarnings("ignore") # 配置日志记录 # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%H:%M') DEFAULT_HEADERS = { 'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 'Referer': 'https://www.baidu.com', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', } def get_company_name(url): # 发送HTTP请求获取网页内容 response = requests.get(url, headers=DEFAULT_HEADERS, verify=False) # 检查请求是否成功 if response.status_code != 200: print(f"Failed to retrieve the page. Status code: {response.status_code}") return None # 解析HTML soup = BeautifulSoup(response.content, 'html.parser') # 查找公司名称的标签 company_name_tag = soup.find('div', {'tag': 'company_name'}) # 提取公司名称 if company_name_tag: text = company_name_tag.text cleaned_text = re.sub(r'[^\w\s]', '', text) # 去除英文符号 cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # 去除多余空格和换行符 return cleaned_text # return company_name_tag.text else: print("公司名称未找到") return None def get_website_title(url): try: # 发送HTTP请求获取网页内容 response = requests.get(url, headers=DEFAULT_HEADERS, verify=False) response.raise_for_status() # 检查请求是否成功 except requests.exceptions.RequestException as e: print(f"Failed to retrieve the page: {e}") return None # 解析HTML soup = BeautifulSoup(response.content, 'html.parser') # 查找标签 title_tag = soup.find('title') # 提取标题内容 if title_tag: return title_tag.text else: print("网站标题未找到") return None def extract_domains_from_file(file_path): domains = [] try: with open(file_path, 'r') as file: for line in file: domains.append(line.strip()) except FileNotFoundError: logging.error(f"文件未找到: {file_path}") except Exception as e: logging.error(f"读取文件时出错: {e}") return domains def create_document(): document = Document() document.styles['Normal'].font.name = 'Times New Roman' document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体') return document def extract_root_domain(url): extracted = tldextract.extract(url) root_domain = f"{extracted.domain}.{extracted.suffix}" # encoded_bytes = base64.b64encode(root_domain.encode('utf-8')) # encoded_str = encoded_bytes.decode('utf-8') return urllib.parse.quote(root_domain) def add_scan_results_to_document(document, domain, results, include_all, description, choice_3): for name, result, status_code, url, res_time in results: if include_all or result == "存在漏洞": company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain)) document.add_heading(f"目标:{domain}", level=3) document.add_paragraph(f"漏洞名称:{name}") document.add_paragraph(f"公司名称:{company_name}") document.add_paragraph(f"漏洞链接:{url}") document.add_paragraph(f"响应状态:{status_code}") document.add_paragraph(f"响应时间:{res_time}") document.add_paragraph(f"漏洞情况:{result}") document.add_paragraph("\n") if result == "存在漏洞" and choice_3 == "y": screenshot_path_1 = screenshot(url) # print(screenshot_path_1) # 站长工具反爬,该截图路径已废弃 # screenshot_path_2 = screenshot("https://icp.chinaz.com/home/info?host=" + extract_root_domain(domain)) # print(screenshot_path_2) #word处理部分 #导入模板 doc = docx.Document("./file/模板.docx") #固定重复部分,以下对应模板中:网站域名,漏洞名称,测试用例 #word中对应的位置,分别为:网站名称,网站域名,漏洞名称,测试用例 title = get_website_title(domain) doc.paragraphs[2].text = title doc.paragraphs[4].text = domain doc.paragraphs[6].text = name doc.paragraphs[8].text = url doc.paragraphs[10].text = description #验证截图,添加图片对应word的位置 paragraph = doc.paragraphs[10] #图片位置 #添加图片部分,宽高自行调整,单位cm run = paragraph.add_run() if screenshot_path_1: run.add_picture(screenshot_path_1, width=Cm(16.52), height=Cm(9.13)) #添加图片 # run.add_picture(screenshot_path_2, width=Cm(16.52), height=Cm(9.13)) #添加ICP备案图片,已废弃寻找新方法 doc_save_path = './file/result/' if not os.path.exists(doc_save_path): os.mkdir(doc_save_path) #保存word,根据需要自行更改 doc_name = str(company_name) + "_" + name + ".docx" doc.save(doc_save_path + doc_name) def mass_poc_scan(domains, include_all, choice_2, docx_name, status): document = create_document() current_domain = None # 用于记录当前正在扫描的域名 try: for domain in domains: logging.info(f"正在扫描域名:{domain}") current_domain = domain # 记录当前正在扫描的域名 if status == 'y': if not check_url_status(domain): logging.warning(f"访问失败,跳过当前域名的扫描:{domain}") print("--------------------------------------------------") if choice_2.lower() == 'y': document.add_heading(f"目标:{domain} 无法访问!", level=3) # 将标题升级为level=3 continue try: results, description = validate_main(domain) add_scan_results_to_document(document, domain, results, include_all, description, choice_3) except Exception as e: logging.error(f"扫描域名 {domain} 时出错:{e}") print("--------------------------------------------------") else: try: results, description = validate_main(domain) add_scan_results_to_document(document, domain, results, include_all, description, choice_3) except Exception as e: logging.error(f"扫描域名 {domain} 时出错:{e}") print("--------------------------------------------------") except KeyboardInterrupt: print(Fore.RED + '\n检测到Ctrl+C,中断程序:') if current_domain is not None: print(f"当前正在扫描的域名为:{current_domain} 。") else: print("暂未开始扫描计划。") print(Fore.RESET) save_document(document, docx_name) def save_document(document, docx_name): timestamp = str(int(time.time())) # 如果 docx_name 为空,则结束程序 if not docx_name: logging.info("程序结束!") sys.exit() results_dir = "./file/report/" if not os.path.exists(results_dir): os.makedirs(results_dir) document.save(f"{results_dir}/{docx_name}_{timestamp}.docx") logging.info("扫描报告已生成!") if __name__ == "__main__": parser = argparse.ArgumentParser(description="大部分参数在启动程序后输入,若需要启用默认模式,请查看以下说明。") parser.add_argument('--batch', action='store_true', help='启用批处理模式,默认使用urls目标/删除无洞链接/生成报告') parser.add_argument('-n', '--name', type=str, help='输入文件名称') args = parser.parse_args() if args.batch: # 批处理模式 print("默认模式(默认目标文档/筛选漏洞链接/生成报告)!") file_path = "./urls.txt" include_all = False choice_3 = 'y' status = 'y' else: # 交互模式 choice = input(Fore.BLUE + "请问是否需要输入其他目标文件?(y/n): " + Fore.RESET).lower() if choice == 'n': print("默认目标文档:urls.txt") file_path = "./urls.txt" else: file_path = input(Fore.BLUE + "请输入需要扫描的目标文件:" + Fore.RESET) print("--------------------------------------------------") domains = extract_domains_from_file(file_path) status = input(Fore.BLUE + "请问是否需要检查目标网站存活状态?(y/n): " + Fore.RESET).lower() print("--------------------------------------------------") choice_2 = input(Fore.BLUE + "请问是否删除无漏洞网站记录?(y/n): " + Fore.RESET).lower() include_all = choice_2 != 'y' print("--------------------------------------------------") choice_3 = input(Fore.BLUE + "请问是否生成漏洞报告?(y/n): " + Fore.RESET).lower() print("--------------------------------------------------") # 执行扫描 domains = extract_domains_from_file(file_path) if args.batch: mass_poc_scan(domains, include_all, choice_3, args.name, status) else: docx_name = input(Fore.BLUE + "请输入总报告文件名(回车可跳过生成报告步骤):" + Fore.RESET) print("--------------------------------------------------") mass_poc_scan(domains, include_all, choice_3, docx_name, status) # if __name__ == "__main__": # domain = 'http://vr.sh-fit.com:9090' # company_name = get_company_name("https://whois.west.cn/icp/" + extract_root_domain(domain)) # print(company_name)