diff --git a/Core.py b/Core.py index 2975592..df8e041 100644 --- a/Core.py +++ b/Core.py @@ -14,9 +14,11 @@ import requests from datetime import datetime, timedelta from SendCore.FeishuSendBot import SendToFeishu, gen_sign from SendCore.QiweiSendBot import SendToWX -from media.common import run, seebug_main, M_4hou_main, anquanke_main, sec_wiki_main, huawei_main, doonsec_main, qianxin_main -from media.freebuf import freebuf_main -from media.xianzhi import xianzhi_main +from spider.common import run, seebug_main, M_4hou_main, anquanke_main, sec_wiki_main, huawei_main, doonsec_main, qianxin_main +from spider.freebuf import freebuf_main +from spider.xianzhi import xianzhi_main +from spider.sougou_wx import sougou_wx_main +from spider.github import github_main from GotoSend.M_4hou import Src_4hou from GotoSend.anquanke import Src_anquanke from GotoSend.doonsec import Src_doonsec @@ -24,7 +26,9 @@ from GotoSend.xianzhi import Src_xianzhi from GotoSend.freebuf import Src_freebuf from GotoSend.qianxin import Src_qianxin from GotoSend.seebug import Src_seebug -from config.check_config import get_core_config, get_debug_config +from GotoSend.sougou_wx import Src_sougou_wx +from GotoSend.github import Src_github +from config.check_config import get_core_config, get_debug_config, get_kewords_config from loguru import logger # 清除所有已有的日志记录器配置 @@ -49,7 +53,10 @@ def signal_handler(sig, frame): signal.signal(signal.SIGINT, signal_handler) # Ctrl+C signal.signal(signal.SIGTERM, signal_handler) # kill命令 webhook_url_once, timestamp_once, sign_once = gen_sign() -e_hour, choice, fs_activate, wx_activate, ding_activate, lx_activate, url_web = get_core_config() +e_hour, time_choice, choice, fs_activate, wx_activate, ding_activate, lx_activate, url_web = get_core_config() +Sogou_WX, Doonsec_switch, Doonsec = get_kewords_config() + + def check_avaliable(info_long, info_short, title, webhook_url, timestamp, sign): if info_long: # 发送完整文章相关内容 @@ -80,10 +87,8 @@ def check_avaliable(info_long, info_short, title, webhook_url, timestamp, sign): if not info_long and not info_short: logger.info(f"{title}数据为空,跳过执行。") -def send_job(time_1): +def send_job_RSS(time_1): # 爬取数据 - logger.info(f"发送程序启动,当前时间为:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") - logger.info("正在启动各爬虫并获取资源中...") seebug_main() anquanke_main() huawei_main() @@ -96,7 +101,7 @@ def send_job(time_1): # 分析各个数据源的结果(输出长结果) result_4hou_long = Src_4hou(time_1, False) result_anquanke_long = Src_anquanke(time_1, False) - result_doonsec_long = Src_doonsec(time_1, False) + result_doonsec_long = Src_doonsec(time_1, False, Doonsec_switch, Doonsec) result_xianzhi_long = Src_xianzhi(time_1, False) result_freebuf_long = Src_freebuf(time_1, False) result_qianxin_long = Src_qianxin(time_1, False) @@ -104,7 +109,7 @@ def send_job(time_1): # 分析各个数据源的结果(输出短结果) result_4hou_short = Src_4hou(time_1, True) result_anquanke_short = Src_anquanke(time_1, True) - result_doonsec_short = Src_doonsec(time_1, True) + result_doonsec_short = Src_doonsec(time_1, True, Doonsec_switch, Doonsec) result_xianzhi_short = Src_xianzhi(time_1, True) result_freebuf_short = Src_freebuf(time_1, True) result_qianxin_short = Src_qianxin(time_1, True) @@ -120,17 +125,21 @@ def send_job(time_1): check_avaliable(result_qianxin_long, result_qianxin_short, "奇安信攻防社区资讯", webhook_url, timestamp, sign) check_avaliable(result_seebug_long, result_seebug_short, "Seebug社区资讯", webhook_url, timestamp, sign) - if fs_activate == "True": - send_result = SendToFeishu(f"[点此访问]({url_web})网站以查看全部文章。", "单次运行结束", webhook_url, timestamp, sign) - logger.info(send_result) - else: - pass - if wx_activate == "True": - send_result = SendToWX(f"[点此访问]({url_web})网站以查看全部文章。", "单次运行结束") - logger.info(send_result) - else: - pass - logger.info("执行完毕,等待下一次执行...") +def send_job_SX(): + sougou_wx_main(Sogou_WX) + result_sx_long = Src_sougou_wx(False) + result_sx_short = Src_sougou_wx(True) + webhook_url, timestamp, sign = gen_sign() + check_avaliable(result_sx_long, result_sx_short, "微信公众号关键词相关内容", webhook_url, timestamp, sign) + +def send_job_github(time_1): + github_main() + result_github_1_long, result_github_2_long, result_github_3_long = Src_github(time_1, False) + result_github_1_short, result_github_2_short, result_github_3_short = Src_github(time_1, True) + webhook_url, timestamp, sign = gen_sign() + check_avaliable(result_github_1_long, result_github_1_short, "Github项目监控-关键词监控", webhook_url, timestamp, sign) + check_avaliable(result_github_2_long, result_github_2_short, "Github项目监控-项目更新情况", webhook_url, timestamp, sign) + check_avaliable(result_github_3_long, result_github_3_short, "Github项目监控-大佬工具", webhook_url, timestamp, sign) # 探测rss源状态 def check_rss_status(url): @@ -167,12 +176,22 @@ def test_rss_source(): return rss_info -def main_loop(choice): - if choice == 1: +def main_job(e_hour): + logger.info(f"发送程序启动,当前时间为:{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + logger.info("正在启动各爬虫并获取资源中...") + if 0 in choice: + send_job_RSS(e_hour) + if 1 in choice: + send_job_SX() + if 2 in choice: + send_job_github(e_hour) + +def main_loop(time_choice): + if time_choice == 1: while True: try: # 执行任务 - send_job(e_hour) + main_job(e_hour) time.sleep(e_hour * 60 * 60 - 3 * 60) except Exception as e: @@ -181,13 +200,13 @@ def main_loop(choice): # logger.info(result) exit() - elif choice == 0: + elif time_choice == 0: # 设置每天的特定时间点执行job函数 - schedule.every().day.at("09:00").do(send_job, 12) - schedule.every().day.at("12:00").do(send_job, 3) - schedule.every().day.at("15:00").do(send_job, 3) - schedule.every().day.at("18:00").do(send_job, 3) - schedule.every().day.at("21:00").do(send_job, 3) + schedule.every().day.at("09:00").do(main_job, 12) + schedule.every().day.at("12:00").do(main_job, 3) + schedule.every().day.at("15:00").do(main_job, 3) + schedule.every().day.at("18:00").do(main_job, 3) + schedule.every().day.at("21:00").do(main_job, 3) while True: schedule.run_pending() @@ -198,26 +217,31 @@ def send_first_message(): start_info = "" start_info += "程序已启动,当前时间为:" + datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n" start_info += "程序作者:MasonLiu \t 开源地址:[GM-gitea](https://git.masonliu.com/MasonLiu/PyBot)" + "\n" - if choice == 1: + if time_choice == 1: start_info += "时间配置:每隔" + str(e_hour) + "小时执行一次推送\n" - else: + elif time_choice == 0: start_info += "时间配置:每天固定时间点执行推送\n" - if fs_activate == "True": - result = SendToFeishu(start_info, "程序信息", webhook_url_once, timestamp_once, sign_once) - logger.info(result) - result = SendToFeishu(rss_info, "RSS源状态", webhook_url_once, timestamp_once, sign_once) - # logger.info(rss_info) - logger.info(result) - else: - pass - if wx_activate == "True": - result = SendToWX(start_info, "程序信息") - logger.info(result) - result = SendToWX(rss_info, "RSS源状态") - # logger.info(rss_info) - logger.info(result) - else: - pass + if 0 in choice: + if fs_activate == "True": + result = SendToFeishu(start_info, "程序信息", webhook_url_once, timestamp_once, sign_once) + logger.info(result) + result = SendToFeishu(rss_info, "RSS源状态", webhook_url_once, timestamp_once, sign_once) + # logger.info(rss_info) + logger.info(result) + send_result = SendToFeishu(f"[点此访问]({url_web})网站以查看全部文章。", "首次运行提醒", webhook_url_once, timestamp_once, sign_once) + logger.info(send_result) + else: + pass + if wx_activate == "True": + result = SendToWX(start_info, "程序信息") + logger.info(result) + result = SendToWX(rss_info, "RSS源状态") + # logger.info(rss_info) + logger.info(result) + send_result = SendToWX(f"[点此访问]({url_web})网站以查看全部文章。", "首次运行提醒") + logger.info(send_result) + else: + pass if __name__ == "__main__": logger.info("程序正在运行当中。") @@ -228,4 +252,4 @@ if __name__ == "__main__": # 主程序 send_first_message() - main_loop(choice) \ No newline at end of file + main_loop(time_choice) \ No newline at end of file diff --git a/Dev_test.py b/Dev_test.py index cdb73d4..a68e53d 100644 --- a/Dev_test.py +++ b/Dev_test.py @@ -1,10 +1,54 @@ +# -*- coding: utf-8 -*- +""" +@Author: MasonLiu +@Description: 本程序可以爬取各安全资讯源,并发送到飞书群组。 +""" + +import schedule +import os +import signal +import sys +import time +import yaml +import requests +from datetime import datetime, timedelta from SendCore.FeishuSendBot import SendToFeishu, gen_sign +from SendCore.QiweiSendBot import SendToWX +from media.common import run, seebug_main, M_4hou_main, anquanke_main, sec_wiki_main, huawei_main, doonsec_main, qianxin_main +from media.freebuf import freebuf_main +from media.xianzhi import xianzhi_main +from media.sougou_wx import sougou_wx_main +from media.github import github_main +from GotoSend.M_4hou import Src_4hou +from GotoSend.anquanke import Src_anquanke +from GotoSend.doonsec import Src_doonsec +from GotoSend.xianzhi import Src_xianzhi +from GotoSend.freebuf import Src_freebuf +from GotoSend.qianxin import Src_qianxin +from GotoSend.seebug import Src_seebug +from GotoSend.sougou_wx import Src_sougou_wx +# from GotoSend.github import Src_github +from config.check_config import get_core_config, get_debug_config, get_kewords_config +from loguru import logger -webhook_url, timestamp, sign = gen_sign() +# # 全局变量 +# webhook_url_once, timestamp_once, sign_once = gen_sign() +# e_hour, time_choice, choice, fs_activate, wx_activate, ding_activate, lx_activate, url_web = get_core_config() +# Sogou_WX = get_kewords_config() +# webhook_url, timestamp, sign = gen_sign() -# 测试用消息体 -test_msg = { - "请单件文档查看昨天讨论的方案相关飞书文档,注意作者为 **张三** 版本为 \n*002* ,版本 ~~001~~ 已经删除。文件地址是 [https://www.feishu.cn](https://www.feishu.cn),打开次数:1次" - } +# sougou_wx_main(Sogou_WX) +# result_sx_long = Src_sougou_wx(False) +# result_sx_short = Src_sougou_wx(True) +# webhook_url, timestamp, sign = gen_sign() +# SendToFeishu(result_sx_long, "微信公众号关键词相关内容", webhook_url, timestamp, sign) -SendToFeishu(test_msg, "先知社区资讯递送", webhook_url, timestamp, sign) \ No newline at end of file +github_main() + + +# # 测试用消息体 +# test_msg = { +# "请单件文档查看昨天讨论的方案相关飞书文档,注意作者为 **张三** 版本为 \n*002* ,版本 ~~001~~ 已经删除。文件地址是 [https://www.feishu.cn](https://www.feishu.cn),打开次数:1次" +# } + +# SendToFeishu(test_msg, "先知社区资讯递送", webhook_url, timestamp, sign) diff --git a/GotoSend/__pycache__/doonsec.cpython-312.pyc b/GotoSend/__pycache__/doonsec.cpython-312.pyc index 67cfc5a..95d06a5 100644 Binary files a/GotoSend/__pycache__/doonsec.cpython-312.pyc and b/GotoSend/__pycache__/doonsec.cpython-312.pyc differ diff --git a/GotoSend/__pycache__/github.cpython-312.pyc b/GotoSend/__pycache__/github.cpython-312.pyc new file mode 100644 index 0000000..9b0510d Binary files /dev/null and b/GotoSend/__pycache__/github.cpython-312.pyc differ diff --git a/GotoSend/__pycache__/qianxin.cpython-312.pyc b/GotoSend/__pycache__/qianxin.cpython-312.pyc index 3d3d4d9..f18811a 100644 Binary files a/GotoSend/__pycache__/qianxin.cpython-312.pyc and b/GotoSend/__pycache__/qianxin.cpython-312.pyc differ diff --git a/GotoSend/__pycache__/sougou_wx.cpython-312.pyc b/GotoSend/__pycache__/sougou_wx.cpython-312.pyc new file mode 100644 index 0000000..cf2c2da Binary files /dev/null and b/GotoSend/__pycache__/sougou_wx.cpython-312.pyc differ diff --git a/GotoSend/doonsec.py b/GotoSend/doonsec.py index 74b98df..6e30760 100644 --- a/GotoSend/doonsec.py +++ b/GotoSend/doonsec.py @@ -68,20 +68,37 @@ def get_doonsec_json(): return total_data -def select_articles(e_hour): +def select_articles(e_hour, Doonsec_switch, Doonsec): conn = sqlite3.connect('./db/doonsec.db') cursor = conn.cursor() # 获取当前日期和时间 now = datetime.now() - start_time = now - timedelta(hours=e_hour) + start_time = now - timedelta(hours=e_hour, minutes=3) end_time = now - # 查询指定时间段内的数据 - cursor.execute(''' - SELECT * FROM articles - WHERE pubDate BETWEEN ? AND ? - ''', (start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time.strftime('%Y-%m-%d %H:%M:%S'))) + if Doonsec_switch == False: + # 查询指定时间段内的数据 + cursor.execute(''' + SELECT * FROM articles + WHERE pubDate BETWEEN ? AND ? + ''', (start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time.strftime('%Y-%m-%d %H:%M:%S'))) + elif Doonsec_switch == True: + # 查询指定时间段内且title包含特定关键词的数据 + placeholders = ', '.join(['?'] * len(Doonsec)) + query = f''' + SELECT * FROM articles + WHERE pubDate BETWEEN ? AND ? + AND (title LIKE ? OR title LIKE ? OR title LIKE ?) + ''' + + # 构建参数列表 + params = [start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time.strftime('%Y-%m-%d %H:%M:%S')] + for keyword in Doonsec: + params.append(f'%{keyword}%') + + # 执行查询 + cursor.execute(query, params) results = cursor.fetchall() conn.close() @@ -133,7 +150,7 @@ def get_filtered_articles(entries, Is_short): return result -def Src_doonsec(e_hour, Is_short): +def Src_doonsec(e_hour, Is_short, Doonsec_switch, Doonsec): if not os.path.exists('./db/doonsec.db'): # 创建数据库和表 create_database() @@ -148,7 +165,7 @@ def Src_doonsec(e_hour, Is_short): insert_data(M_doonsec_data) # 查询指定时间段内的数据 - filtered_articles = select_articles(e_hour) + filtered_articles = select_articles(e_hour, Doonsec_switch, Doonsec) # print(filtered_articles) if filtered_articles: @@ -158,7 +175,7 @@ def Src_doonsec(e_hour, Is_short): return False if __name__ == "__main__": - reslts = Src_doonsec(4, False) + reslts = Src_doonsec(24, False, True, ["webshell", "2000", "POC"] ) if reslts != False: print(reslts) else: diff --git a/GotoSend/freebuf.py b/GotoSend/freebuf.py index 24a0ec8..35c4c0d 100644 --- a/GotoSend/freebuf.py +++ b/GotoSend/freebuf.py @@ -50,7 +50,7 @@ def get_freebuf_json(): # 假设data是一个包含多个JSON对象的列表 if not isinstance(data, list): - raise ValueError("JSON文件格式错误,请检查common.py是否异常!") + raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") # 提取所需字段并编号 total_data = [] diff --git a/GotoSend/github.py b/GotoSend/github.py index e69de29..83779fa 100644 --- a/GotoSend/github.py +++ b/GotoSend/github.py @@ -0,0 +1,369 @@ +# -*- coding: utf-8 -*- + +import json +import sqlite3 +import os +from datetime import datetime, timedelta + +def create_database(): + conn = sqlite3.connect('./db/github.db') + cursor = conn.cursor() + + cursor.executescript(''' + CREATE TABLE IF NOT EXISTS keywords ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT, + link TEXT, + description TEXT, + pubDate DATETIME, + author TEXT, + keyword TEXT, + language TEXT, + is_sended BOOLEAN + ); + CREATE TABLE IF NOT EXISTS repos ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT, + link TEXT, + description TEXT, + pubDate DATETIME, + author TEXT, + keyword TEXT, + link2 TEXT + ); + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT, + link TEXT, + description TEXT, + pubDate DATETIME, + author TEXT, + keyword TEXT, + language TEXT, + is_sended BOOLEAN + ); + ''') + + conn.commit() + conn.close() + + +def insert_data(): + # 检查文件是否存在 + # 打开并读取JSON文件 + # 假设data是一个包含多个JSON对象的列表,然后校验JSON格式是否异常 + if not os.path.exists('./JSON/github_keyword.json'): + raise FileNotFoundError(f"github_keyword文件不存在,请检查程序是否运行正常!") + with open('./JSON/github_keyword.json', 'r', encoding='utf-8') as file: + data_keyword = json.load(file) + if not isinstance(data_keyword, list): + raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") + + if not os.path.exists('./JSON/github_repo.json'): + raise FileNotFoundError(f"github_repo文件不存在,请检查程序是否运行正常!") + with open('./JSON/github_repo.json', 'r', encoding='utf-8') as file: + data_repo = json.load(file) + if not isinstance(data_repo, list): + raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") + + if not os.path.exists('./JSON/github_user.json'): + raise FileNotFoundError(f"github_user文件不存在,请检查程序是否运行正常!") + with open('./JSON/github_user.json', 'r', encoding='utf-8') as file: + data_user = json.load(file) + if not isinstance(data_user, list): + raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") + + conn = sqlite3.connect('./db/github.db') + cursor = conn.cursor() + + # 提取所需字段并编号 + for index, item in enumerate(data_keyword, start=1): + entry = { + "id": index, + "title": item.get("name", ""), + "link": item.get("link", ""), + "description": item.get("description", ""), + "pubDate": item.get("created_at", ""), + "author": item.get("author", ""), + "keyword": item.get("keyword", ""), + "language": item.get("language", "") + } + try: + # 解析 pubDate 字符串为 datetime 对象 + pub_date = datetime.strptime(entry['pubDate'], '%Y-%m-%dT%H:%M:%SZ') + # 格式化 pubDate 为所需的格式 + formatted_pub_date = pub_date.strftime('%Y-%m-%d %H:%M:%S') + except ValueError: + # 如果解析失败,使用原始 pubDate 字符串 + formatted_pub_date = entry['pubDate'] + + + # 检查是否存在相同 title 和 author 的记录 + cursor.execute(''' + SELECT 1 FROM keywords WHERE title = ? AND author = ? + ''', (entry['title'], entry['author'])) + + if cursor.fetchone() is None: + # 如果没有找到相同记录,则插入新记录 + cursor.execute(''' + INSERT INTO keywords (title, link, description, pubDate, author, language, keyword) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', (entry['title'], entry['link'], entry['description'], formatted_pub_date, entry['author'], entry['language'], entry['keyword'])) + + for index, item in enumerate(data_repo, start=1): + entry = { + "id": index, + "title": item.get("name", ""), + "link": item.get("link", ""), + "description": item.get("description", ""), + "pubDate": item.get("updated_at", ""), + "author": item.get("author", ""), + "keyword": item.get("keyword", ""), + "link2": item.get("link_2", "") + } + try: + # 解析 pubDate 字符串为 datetime 对象 + pub_date = datetime.strptime(entry['pubDate'], '%Y-%m-%dT%H:%M:%SZ') + # 格式化 pubDate 为所需的格式 + formatted_pub_date = pub_date.strftime('%Y-%m-%d %H:%M:%S') + except ValueError: + # 如果解析失败,使用原始 pubDate 字符串 + formatted_pub_date = entry['pubDate'] + + cursor.execute(''' + INSERT INTO repos (title, link, description, pubDate, author, link2, keyword) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', (entry['title'], entry['link'], entry['description'], formatted_pub_date, entry['author'], entry['link2'], entry['keyword'])) + + + # 插入 users 数据 + for index, item in enumerate(data_user, start=1): + entry = { + "id": index, + "title": item.get("name", ""), + "link": item.get("link", ""), + "description": item.get("description", ""), + "pubDate": item.get("created_at", ""), + "author": item.get("author", ""), + "keyword": item.get("keyword", ""), + "language": item.get("language", "") + } + try: + # 解析 pubDate 字符串为 datetime 对象 + pub_date = datetime.strptime(entry['pubDate'], '%Y-%m-%dT%H:%M:%SZ') + # 格式化 pubDate 为所需的格式 + formatted_pub_date = pub_date.strftime('%Y-%m-%d %H:%M:%S') + except ValueError: + # 如果解析失败,使用原始 pubDate 字符串 + formatted_pub_date = entry['pubDate'] + + # 检查是否存在相同 title 和 author 的记录 + cursor.execute(''' + SELECT 1 FROM users WHERE title = ? AND author = ? + ''', (entry['title'], entry['author'])) + + if cursor.fetchone() is None: + # 如果没有找到相同记录,则插入新记录 + cursor.execute(''' + INSERT INTO users (title, link, description, pubDate, author, keyword, language) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', (entry['title'], entry['link'], entry['description'], formatted_pub_date, entry['author'], entry['keyword'], entry['language'])) + + conn.commit() + conn.close() + +def select_articles(e_hour): + conn = sqlite3.connect('./db/github.db') + cursor = conn.cursor() + + # 获取当前日期和时间 + now = datetime.now() + two_months_ago = now - timedelta(days=60) # 假设两个月大约60天 + start_time = now - timedelta(hours=e_hour, minutes=3) + + # 查询指定时间段内的数据 + cursor.execute(''' + SELECT * FROM keywords + WHERE is_sended IS NULL AND pubDate BETWEEN ? AND ? + ORDER BY pubDate DESC + ''', (start_time.strftime('%Y-%m-%d %H:%M:%S'), now.strftime('%Y-%m-%d %H:%M:%S'))) + + result_1 = cursor.fetchall() + + if result_1: + for row in result_1: + keyword_id = row[0] + cursor.execute(''' + UPDATE keywords + SET is_sended = True + WHERE id = ? + ''', (keyword_id,)) + + conn.commit() + + cursor.execute(''' + SELECT * FROM repos + WHERE pubDate BETWEEN ? AND ? + ''', (start_time.strftime('%Y-%m-%d %H:%M:%S'), now.strftime('%Y-%m-%d %H:%M:%S'))) + + result_2 = cursor.fetchall() + + # 查询最近的5条未被标记为True的消息且发布时间不超过两个月 + cursor.execute(''' + SELECT * FROM users + WHERE is_sended IS NULL AND pubDate BETWEEN ? AND ? + ORDER BY pubDate DESC + LIMIT 5 + ''', (two_months_ago.strftime('%Y-%m-%d %H:%M:%S'), now.strftime('%Y-%m-%d %H:%M:%S'))) + + # 查询最近的3条未被标记为True的消息 + # cursor.execute(''' + # SELECT * FROM users + # WHERE is_sended IS NULL + # ORDER BY pubDate DESC + # LIMIT 5 + # ''') + + result_3 = cursor.fetchall() + # print(results) + + if result_3: + for row in result_3: + user_id = row[0] + cursor.execute(''' + UPDATE users + SET is_sended = True + WHERE id = ? + ''', (user_id,)) + + conn.commit() # 提交事务 + + cursor.close() + conn.close() + + return result_1, result_2, result_3 + +def clear_table(): + conn = sqlite3.connect('./db/github.db') + cursor = conn.cursor() + cursor.execute('DELETE FROM repos') + conn.commit() + conn.close() + +def record_md(result, filename="./history/github.md"): + # 读取现有内容 + if os.path.exists(filename): + with open(filename, 'r', encoding='utf-8') as file: + existing_content = file.read() + else: + existing_content = "" + + # 将新内容插入到现有内容的开头 + new_content = result + existing_content + + # 写回文件 + with open(filename, 'w', encoding='utf-8') as file: + file.write(new_content) + +def get_filtered_articles(entries, Is_short, choice): + result = "" + record = "" + for entry in entries: + if Is_short == False: + if choice == 1: + result += f"关键词【{entry[6]}】发现新项目:[{entry[1]}]({entry[2]})\n" + result += f"项目描述:{entry[3]}\n" + result += f"上传时间:{entry[4]}\n" + result += f"开发语言:{entry[7]}\t\t作者:{entry[5]}\n" + result += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 2: + result += f"项目:[{entry[1]}]({entry[2]})存在更新!!!\n" + result += f"更新描述:{entry[3]}\n" + result += f"更新时间:{entry[4]}\n" + result += f"提交者:{entry[5]},[点此查看提交详情]({entry[2]})\n" + result += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 3: + result += f"大佬 {entry[5]} 上传了一个新工具:[{entry[1]}]({entry[2]})\n" + result += f"项目描述:{entry[3]}\n" + result += f"上传时间:{entry[4]}\n" + result += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif Is_short == True: + if choice == 1: + result += f"关键词【{entry[7]}】发现新项目:[{entry[1]}]({entry[2]})\n" + result += f"上传时间:{entry[4]}\n" + result += f"开发语言:{entry[6]}\t\t作者:{entry[5]}\n" + result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 2: + result += f"项目:[{entry[1]}]({entry[2]})存在更新!!!\n" + result += f"更新描述:{entry[3]}\n" + result += f"更新时间:{entry[4]}\n" + result += f"提交者:{entry[5]},[点此查看提交详情]({entry[2]})\n" + result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 3: + result += f"大佬 {entry[5]} 上传了一个新工具:[{entry[1]}]({entry[2]})\n" + result += f"项目描述:{entry[3]}\n" + result += f"上传时间:{entry[4]}\n" + result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 + + if choice == 1: + record += f"#### 关键词【{entry[7]}】发现新项目:[{entry[1]}]({entry[2]})\n" + record += f"**项目描述**:{entry[3]}\n" + record += f"**上传时间**:{entry[4]}\n" + record += f"**开发语言**:{entry[6]}\t\t**作者**:{entry[5]}\n" + record += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 2: + record += f"#### 项目:[{entry[1]}]({entry[2]})存在更新!!!\n" + record += f"**更新描述**:{entry[3]}\n" + record += f"**更新时间**:{entry[4]}\n" + record += f"**提交者**:{entry[5]},[点此查看提交详情]({entry[2]})\n" + record += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 3: + record += f"#### 大佬 {entry[5]} 上传了一个新工具:[{entry[1]}]({entry[2]})\n" + record += f"**项目描述**:{entry[3]}\n" + record += f"**上传时间**:{entry[4]}\n" + record += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + + record_md(record) + return result + + +def Src_github(e_hour, Is_short): + if not os.path.exists('./db/github.db'): + # 创建数据库和表 + create_database() + + # 清空表 + clear_table() + + # 插入数据到数据库 + insert_data() + + # 查询指定时间段内的数据 + keyword_data, repo_data, user_data = select_articles(e_hour) + + if keyword_data: + result_1 = get_filtered_articles(keyword_data, Is_short, 1) + else: + result_1 = "" + if repo_data: + result_2 = get_filtered_articles(repo_data, Is_short, 2) + else: + result_2 = "" + if user_data: + result_3 = get_filtered_articles(user_data, Is_short, 3) + else: + result_3 = "" + return result_1, result_2, result_3 + +if __name__ == "__main__": + result_1, result_2, result_3 = Src_github(24, False) + if result_1 != "": + print(result_1) + elif result_2 != "": + print(result_2) + if result_3 != "": + print(result_3) + else: + # 如果为空,则跳过执行 + print("-" * 10) + print("github数据为空,跳过执行。") \ No newline at end of file diff --git a/GotoSend/qianxin.py b/GotoSend/qianxin.py index 62bc2b9..c1b1941 100644 --- a/GotoSend/qianxin.py +++ b/GotoSend/qianxin.py @@ -108,7 +108,7 @@ def get_filtered_articles(entries, Is_short): result += f"上传时间:{entry[5]}\n" result += f"描述:{entry[4]}\n" result += "\n" + "-" * 40 + "\n" # 添加分隔线以便区分不同文章 - if Is_short == False: + if Is_short == True: result += f"文章:[{entry[1]}]({entry[2]})\n" result += f"上传时间:{entry[5]}\n" result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 diff --git a/GotoSend/sougou-wx.py b/GotoSend/sougou_wx.py similarity index 98% rename from GotoSend/sougou-wx.py rename to GotoSend/sougou_wx.py index 24d5966..11ddf2f 100644 --- a/GotoSend/sougou-wx.py +++ b/GotoSend/sougou_wx.py @@ -59,7 +59,7 @@ def get_json(): for keyword, keyword_data in data.items(): # 检查关键词对应的数据是否为列表 if not isinstance(keyword_data, list): - raise ValueError(f"关键词 {keyword} 对应的数据格式错误,请检查common.py是否异常!") + raise ValueError(f"关键词 {keyword} 对应的数据格式错误,请检查爬取程序是否异常!") # 提取所需字段并编号 for index, item in enumerate(keyword_data, start=1): diff --git a/JSON/xianzhi.json b/JSON/xianzhi.json index 21e03eb..867f6d1 100644 --- a/JSON/xianzhi.json +++ b/JSON/xianzhi.json @@ -1,4 +1,154 @@ [ + { + "title": "cjson&json 二进制漏洞利用总结", + "link": "https://xz.aliyun.com/t/16928", + "published": "2024-12-26T22:55:12+08:00", + "id": "https://xz.aliyun.com/t/16928", + "summary": { + "@type": "html", + "#text": "cjson&json 二进制漏洞利用总结" + } + }, + { + "title": "CVE-2024-9047 (WordPress File Upload 插件漏洞分析)", + "link": "https://xz.aliyun.com/t/16927", + "published": "2024-12-26T20:23:12+08:00", + "id": "https://xz.aliyun.com/t/16927", + "summary": { + "@type": "html", + "#text": "CVE-2024-9047 (WordPress File Upload 插件漏洞分析)" + } + }, + { + "title": "Windows应急响应之USBDevices日志", + "link": "https://xz.aliyun.com/t/16926", + "published": "2024-12-26T17:39:57+08:00", + "id": "https://xz.aliyun.com/t/16926", + "summary": { + "@type": "html", + "#text": "Windows应急响应之USBDevices日志" + } + }, + { + "title": "0CTF 2024 部分WP", + "link": "https://xz.aliyun.com/t/16925", + "published": "2024-12-26T17:39:49+08:00", + "id": "https://xz.aliyun.com/t/16925", + "summary": { + "@type": "html", + "#text": "0CTF 2024 部分WP" + } + }, + { + "title": "通过漏洞靶场学习Android应用安全", + "link": "https://xz.aliyun.com/t/16924", + "published": "2024-12-26T17:38:34+08:00", + "id": "https://xz.aliyun.com/t/16924", + "summary": { + "@type": "html", + "#text": "通过漏洞靶场学习Android应用安全" + } + }, + { + "title": "Apache mina CVE-2024-52046漏洞分析复现", + "link": "https://xz.aliyun.com/t/16923", + "published": "2024-12-26T17:30:45+08:00", + "id": "https://xz.aliyun.com/t/16923", + "summary": { + "@type": "html", + "#text": "Apache mina CVE-2024-52046漏洞分析复现" + } + }, + { + "title": "ciscn2024 androidso_re分析", + "link": "https://xz.aliyun.com/t/16920", + "published": "2024-12-26T10:37:32+08:00", + "id": "https://xz.aliyun.com/t/16920", + "summary": { + "@type": "html", + "#text": "ciscn2024 androidso_re分析" + } + }, + { + "title": "前端加解密对抗encrypt-labs", + "link": "https://xz.aliyun.com/t/16919", + "published": "2024-12-26T10:29:25+08:00", + "id": "https://xz.aliyun.com/t/16919", + "summary": { + "@type": "html", + "#text": "前端加解密对抗encrypt-labs" + } + }, + { + "title": "AFL工具使用实践:以libtiff为例的使用指南", + "link": "https://xz.aliyun.com/t/16916", + "published": "2024-12-26T04:16:13+08:00", + "id": "https://xz.aliyun.com/t/16916", + "summary": { + "@type": "html", + "#text": "AFL工具使用实践:以libtiff为例的使用指南" + } + }, + { + "title": "应急响应系列靶场解析", + "link": "https://xz.aliyun.com/t/16914", + "published": "2024-12-25T23:48:42+08:00", + "id": "https://xz.aliyun.com/t/16914", + "summary": { + "@type": "html", + "#text": "应急响应系列靶场解析" + } + }, + { + "title": "记一次某红蓝演练经历", + "link": "https://xz.aliyun.com/t/16913", + "published": "2024-12-25T22:35:21+08:00", + "id": "https://xz.aliyun.com/t/16913", + "summary": { + "@type": "html", + "#text": "记一次某红蓝演练经历" + } + }, + { + "title": "Java安全-WebShell免杀的多种方式", + "link": "https://xz.aliyun.com/t/16912", + "published": "2024-12-25T21:33:55+08:00", + "id": "https://xz.aliyun.com/t/16912", + "summary": { + "@type": "html", + "#text": "Java安全-WebShell免杀的多种方式" + } + }, + { + "title": "Linux渗透实战之Nullbyte靶场提权", + "link": "https://xz.aliyun.com/t/16911", + "published": "2024-12-25T20:46:30+08:00", + "id": "https://xz.aliyun.com/t/16911", + "summary": { + "@type": "html", + "#text": "Linux渗透实战之Nullbyte靶场提权" + } + }, + { + "title": "六安市第二届网络安全大赛WP", + "link": "https://xz.aliyun.com/t/16909", + "published": "2024-12-25T18:21:31+08:00", + "id": "https://xz.aliyun.com/t/16909", + "summary": { + "@type": "html", + "#text": "六安市第二届网络安全大赛WP" + } + }, + { + "title": "PyYAML反序列化学习", + "link": "https://xz.aliyun.com/t/16908", + "published": "2024-12-25T18:12:03+08:00", + "id": "https://xz.aliyun.com/t/16908", + "summary": { + "@type": "html", + "#text": "PyYAML反序列化学习" + } + }, { "title": "JAVA安全之SpEL表达式执行", "link": "https://xz.aliyun.com/t/16905", @@ -848,155 +998,5 @@ "@type": "html", "#text": "第十八届信息安全大赛 && 第二届长城杯 0解PWN题--server解法" } - }, - { - "title": "免杀基础-ppid伪造", - "link": "https://xz.aliyun.com/t/16777", - "published": "2024-12-16T10:00:14+08:00", - "id": "https://xz.aliyun.com/t/16777", - "summary": { - "@type": "html", - "#text": "免杀基础-ppid伪造" - } - }, - { - "title": "高效IO攻击利用学习之House of apple2超详解", - "link": "https://xz.aliyun.com/t/16775", - "published": "2024-12-16T00:37:35+08:00", - "id": "https://xz.aliyun.com/t/16775", - "summary": { - "@type": "html", - "#text": "高效IO攻击利用学习之House of apple2超详解" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - Crypto详解", - "link": "https://xz.aliyun.com/t/16773", - "published": "2024-12-15T22:42:00+08:00", - "id": "https://xz.aliyun.com/t/16773", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - Crypto详解" - } - }, - { - "title": "m0leCon Beginner CTF2024部分wp", - "link": "https://xz.aliyun.com/t/16772", - "published": "2024-12-15T21:31:48+08:00", - "id": "https://xz.aliyun.com/t/16772", - "summary": { - "@type": "html", - "#text": "m0leCon Beginner CTF2024部分wp" - } - }, - { - "title": "Ucum-java XXE漏洞分析(CVE-2024-55887)", - "link": "https://xz.aliyun.com/t/16771", - "published": "2024-12-15T21:04:00+08:00", - "id": "https://xz.aliyun.com/t/16771", - "summary": { - "@type": "html", - "#text": "Ucum-java XXE漏洞分析(CVE-2024-55887)" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - re", - "link": "https://xz.aliyun.com/t/16766", - "published": "2024-12-15T18:21:26+08:00", - "id": "https://xz.aliyun.com/t/16766", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - re" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - zeroshell1~5,WinFT_1~2,sc05_1", - "link": "https://xz.aliyun.com/t/16759", - "published": "2024-12-15T17:26:01+08:00", - "id": "https://xz.aliyun.com/t/16759", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - zeroshell1~5,WinFT_1~2,sc05_1" - } - }, - { - "title": "第二届“长城杯”铁人三项赛 (防护赛)初赛WriteUP", - "link": "https://xz.aliyun.com/t/16789", - "published": "2024-12-15T17:26:01+08:00", - "id": "https://xz.aliyun.com/t/16789", - "summary": { - "@type": "html", - "#text": "第二届“长城杯”铁人三项赛 (防护赛)初赛WriteUP" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - web&re wp", - "link": "https://xz.aliyun.com/t/16755", - "published": "2024-12-15T17:14:30+08:00", - "id": "https://xz.aliyun.com/t/16755", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - web&re wp" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - pwn", - "link": "https://xz.aliyun.com/t/16751", - "published": "2024-12-15T17:05:55+08:00", - "id": "https://xz.aliyun.com/t/16751", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - pwn" - } - }, - { - "title": "第十八届信息安全大赛 && 第二届长城杯 - WEB WP", - "link": "https://xz.aliyun.com/t/16750", - "published": "2024-12-15T17:02:06+08:00", - "id": "https://xz.aliyun.com/t/16750", - "summary": { - "@type": "html", - "#text": "第十八届信息安全大赛 && 第二届长城杯 - WEB WP" - } - }, - { - "title": "HITCTF pipe_snake详解", - "link": "https://xz.aliyun.com/t/16748", - "published": "2024-12-15T09:25:49+08:00", - "id": "https://xz.aliyun.com/t/16748", - "summary": { - "@type": "html", - "#text": "HITCTF pipe_snake详解" - } - }, - { - "title": "记一次客户系统的测试--多种经典类型漏洞", - "link": "https://xz.aliyun.com/t/16747", - "published": "2024-12-14T23:33:38+08:00", - "id": "https://xz.aliyun.com/t/16747", - "summary": { - "@type": "html", - "#text": "记一次客户系统的测试--多种经典类型漏洞" - } - }, - { - "title": "记一次渗透到审计", - "link": "https://xz.aliyun.com/t/16746", - "published": "2024-12-14T21:23:00+08:00", - "id": "https://xz.aliyun.com/t/16746", - "summary": { - "@type": "html", - "#text": "记一次渗透到审计" - } - }, - { - "title": "某次大学的渗透测试经历", - "link": "https://xz.aliyun.com/t/16745", - "published": "2024-12-14T16:51:12+08:00", - "id": "https://xz.aliyun.com/t/16745", - "summary": { - "@type": "html", - "#text": "某次大学的渗透测试经历" - } } ] \ No newline at end of file diff --git a/UpdateLOG.md b/UpdateLOG.md index 032aa21..10be73c 100644 --- a/UpdateLOG.md +++ b/UpdateLOG.md @@ -24,4 +24,6 @@ - 2024年12月18日早:添加了短文本推送机制,一定程度上解决了长字节推送问题(解决办法正在思考中)
- 2024年12月24日晚:上传了测试0.1版本,修复了报错问题
- 2024年12月25日早:优化了代码逻辑和表现
-- 2024年12月25日晚:优化了推送报文格式
\ No newline at end of file +- 2024年12月25日晚:优化了推送报文格式
+- 2024年12月30日:添加并完善了基于搜狗搜索的微信公众号文章监测
+- 2024年12月30日晚:为洞见微信资讯推送添加了关键词筛选
\ No newline at end of file diff --git a/config/__pycache__/check_config.cpython-312.pyc b/config/__pycache__/check_config.cpython-312.pyc index dbefe30..c525bb6 100644 Binary files a/config/__pycache__/check_config.cpython-312.pyc and b/config/__pycache__/check_config.cpython-312.pyc differ diff --git a/config/check_config.py b/config/check_config.py index d3e25a5..da40c00 100644 --- a/config/check_config.py +++ b/config/check_config.py @@ -19,7 +19,8 @@ def get_core_config(): config = yaml.safe_load(file) logger.debug(f"Loaded config: {config}") # 输出加载的配置 - choice = int(f"{config['circle']}") + time_choice = int(f"{config['time_mode']}") + choice = config['mode'] # 假设 mode 是一个列表 e_hour = int(config.get('e_hour', '4')) # 默认循环时间为4小时 fs_activate = f"{config['fs_activate']}" @@ -53,11 +54,20 @@ def get_core_config(): url_web = f"{config['url']}" - return e_hour, choice, fs_activate, wx_activate, ding_activate, lx_activate, url_web + return e_hour, time_choice, choice, fs_activate, wx_activate, ding_activate, lx_activate, url_web def get_debug_config(): with open('./config/config.yaml', 'r', encoding="utf-8") as file: config = yaml.safe_load(file) debug = f"{config['debug']}" - return debug \ No newline at end of file + return debug + +def get_kewords_config(): + with open('./config/keywords.yaml', 'r', encoding="utf-8") as file: + config = yaml.safe_load(file) + Sogou_WX = config['Sogou-WX'] + Doonsec_switch = f"{config['Doonsec-switch']}" + Doonsec = config['Doonsec'] + + return Sogou_WX, Doonsec_switch, Doonsec \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 07530ce..1154f11 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,7 +1,10 @@ # 飞书相关配置信息 +# fs_activate: True +# fs_key: aa04a02f-d7bf-4279-bd48-44c4f28c8f74 # 此处填写token,记得冒号后空一格,如aa04a02f-d7bf-4279-bd48-44c4f28c8f74 +# fs_secret: 4tq65T4jm1MO2IlxvHxBWe # 此处填写签名密钥,记得冒号后空一格,如4tq65T4jm1MO2IlxvHxBWe fs_activate: True -fs_key: aa04a02f-d7bf-4279-bd48-44c4f28c8f74 # 此处填写token,记得冒号后空一格,如aa04a02f-d7bf-4279-bd48-44c4f28c8f74 -fs_secret: 4tq65T4jm1MO2IlxvHxBWe # 此处填写签名密钥,记得冒号后空一格,如4tq65T4jm1MO2IlxvHxBWe +fs_key: 202d7e51-9a46-422e-a035-863bc42bc459 # 此处填写token,记得冒号后空一格,如aa04a02f-d7bf-4279-bd48-44c4f28c8f74 +fs_secret: eZaSCl5DSqtJyZ8QpJBDFh # 此处填写签名密钥,记得冒号后空一格,如4tq65T4jm1MO2IlxvHxBWe # 企业微信相关配置信息 wx_activate: False @@ -23,7 +26,13 @@ sender: test@masonliu.com receivers: ['2857911564@qq.com'] # 结算时间范围 e_hour: 4 # 程序运行时间间隔 -circle: 1 # 是否启用循环,设置为0后将设置为特定时间点运行 +time_mode: 1 +# 0:定时运行模式,仅在指定时间运行(参照Core.py中设置) +# 1:启用循环,一定间隔时间后运行 +mode: [1, 2] # 运行模式,可多选 +# 0:启用RSS抓取模式 +# 1:启用搜狗-微信公众号文章监测 +# 2:启用github项目监测 # 网址配置 url: https://info.masonliu.com/ # 请设置为您自己反代的域名,或者改为 http://127.0.0.1:5000 或者对应IP域名 diff --git a/config/github_config.yaml b/config/github_config.yaml index 9f2f569..c58ca29 100644 --- a/config/github_config.yaml +++ b/config/github_config.yaml @@ -1,24 +1,24 @@ # github相关配置信息 -github_token: xxxxxx # 此处填写github-token,在高速率获取github资源时有效防止403封禁 +github_token: # 此处填写github-token,在高速率获取github资源时有效防止403封禁 translate: False # 是否开启翻译 # 监控列表 tool_list: # 监控已创建的仓库是否更新 - - https://api.github.com/repos/BeichenDream/Godzilla - - https://api.github.com/repos/rebeyond/Behinder - - https://api.github.com/repos/AntSwordProject/antSword - - https://api.github.com/repos/j1anFen/shiro_attack - - https://api.github.com/repos/yhy0/github-cve-monitor - - https://api.github.com/repos/gentilkiwi/mimikatz - - https://api.github.com/repos/ehang-io/nps - - https://api.github.com/repos/chaitin/xray - - https://api.github.com/repos/FunnyWolf/pystinger - - https://api.github.com/repos/L-codes/Neo-reGeorg - - https://api.github.com/repos/shadow1ng/fscan - - https://api.github.com/repos/SafeGroceryStore/MDUT - - https://api.github.com/repos/EdgeSecurityTeam/Vulnerability - - https://api.github.com/repos/wy876/POC - - https://api.github.com/Vme18000yuan/FreePOC + - BeichenDream/Godzilla + - rebeyond/Behinder + - AntSwordProject/antSword + - j1anFen/shiro_attack + - yhy0/github-cve-monitor + - gentilkiwi/mimikatz + - ehang-io/nps + - chaitin/xray + - FunnyWolf/pystinger + - L-codes/Neo-reGeorg + - shadow1ng/fscan + - SafeGroceryStore/MDUT + - EdgeSecurityTeam/Vulnerability + - wy876/POC + - Vme18000yuan/FreePOC keyword_list: # 监控关键词 - sql注入 @@ -43,4 +43,15 @@ user_list: # 监控用户 - zhzyker - lijiejie - projectdiscovery - - HavocFramework \ No newline at end of file + - HavocFramework + +black_words: # 监控违禁词 + - 反共 + - 反中共 + - 反华 + - 香港独立 + - 港独 + - 共产党 + - 毛泽东 + - 习近平 + - 台独 \ No newline at end of file diff --git a/config/keywords.yaml b/config/keywords.yaml index 38e7510..f17b1c8 100644 --- a/config/keywords.yaml +++ b/config/keywords.yaml @@ -1 +1,3 @@ -Sogou-WX: ["中国银行", "APP逆向", "渗透测试"] # 基于搜狗引擎搜索特定关键词的微信公众号文章 \ No newline at end of file +Sogou-WX: ["银行测试", "APP逆向", "渗透测试", "手机银行漏洞", "银行漏洞", "支付漏洞"] # 基于搜狗引擎搜索特定关键词的微信公众号文章 +Doonsec-switch: False +Doonsec: ["逆向", "解包", "POC"] # 洞见微信安全资讯关键词 \ No newline at end of file diff --git a/history/github.md b/history/github.md new file mode 100644 index 0000000..e69de29 diff --git a/history/sec_news.md b/history/sec_news.md index 57d640e..e69de29 100644 --- a/history/sec_news.md +++ b/history/sec_news.md @@ -1,10 +0,0 @@ -#### 文章:[钓鱼下载网站传播“游蛇”威胁,恶意安装程序暗藏远控木马](https://www.4hou.com/posts/6MVz) -**作者**:安天 -**上传时间**:2024-12-25 17:02:19 - ----------------------------------------- -#### 文章:[钓鱼下载网站传播“游蛇”威胁,恶意安装程序暗藏远控木马](https://www.4hou.com/posts/6MVz) -**作者**:安天 -**上传时间**:2024-12-25 17:02:19 - ----------------------------------------- diff --git a/history/tech_passage.md b/history/tech_passage.md index 7748b94..e69de29 100644 --- a/history/tech_passage.md +++ b/history/tech_passage.md @@ -1,228 +0,0 @@ -#### 文章:[【2024补天白帽黑客年度盛典】Windows服务进程漏洞挖掘](https://forum.butian.net/share/4089) -**来源**:subject -**上传时间**:2024-12-25 17:39:57 -**描述**:演讲议题:Windows服务进程漏洞挖掘 - ----------------------------------------- -#### 文章:[【2024补天白帽黑客年度盛典】大模型越狱攻击与评测](https://forum.butian.net/share/4088) -**来源**:subject -**上传时间**:2024-12-25 17:33:43 -**描述**:演讲议题:大模型越狱攻击与评测 - ----------------------------------------- -#### 文章:[【2024补天白帽黑客年度盛典】当今勒索病毒的攻与防](https://forum.butian.net/share/4087) -**来源**:subject -**上传时间**:2024-12-25 17:26:49 -**描述**:演讲议题:当今勒索病毒的攻与防 - ----------------------------------------- -#### 文章:[网安瞭望台第18期:警惕新型攻击利用Windows Defender绕过终端检测、CVE-2024-50379 漏洞利用工具分享](https://mp.weixin.qq.com/s?__biz=Mzg2NTkwODU3Ng==&mid=2247514556&idx=1&sn=a10e80238c91658489ebe6cc8657315c) -**作者**:东方隐侠安全团队 -**上传时间**:2024-12-25 20:31:30 -**简介**:网安资讯分享\\x0d\\x0aDAILY NEWS AND KNOWLEDGE - ----------------------------------------- -#### 文章:[MDUT-Extend(MDUT-增强版) V1.2.0 Released](https://mp.weixin.qq.com/s?__biz=MzI5NDg0ODkwMQ==&mid=2247486138&idx=1&sn=4f881e7e1cc99466d57aa3d95d980b3b) -**作者**:格格巫和蓝精灵 -**上传时间**:2024-12-25 20:16:51 -**简介**:None - ----------------------------------------- -#### 文章:[CobaltStrike Bof开发(1)](https://mp.weixin.qq.com/s?__biz=Mzg5MDg3OTc0OA==&mid=2247489138&idx=1&sn=3095870df2c9d365db698936abde43b2) -**作者**:Relay学安全 -**上传时间**:2024-12-25 20:05:52 -**简介**:None - ----------------------------------------- -#### 文章:[Sa7mon-S3scanner:一款针对S3 Bucket的错误配置扫描工具](https://mp.weixin.qq.com/s?__biz=MjM5NjA0NjgyMA==&mid=2651310595&idx=4&sn=78fdcc1150147cc6155e1a2e73c31521) -**作者**:FreeBuf -**上传时间**:2024-12-25 19:56:23 -**简介**:该工具兼容S3 API,可扫描开放S3 Bucket中潜在的错误配置。 - ----------------------------------------- -#### 文章:[免杀对抗从0开始(七)](https://mp.weixin.qq.com/s?__biz=Mzk0MzU5NTg1Ng==&mid=2247484849&idx=1&sn=f075965e73b511cfba0e53536232cf34) -**作者**:泾弦安全 -**上传时间**:2024-12-25 19:50:33 -**简介**:None - ----------------------------------------- -#### 文章:[针对护网行动中红队溯源工具 - HuntBack](https://mp.weixin.qq.com/s?__biz=MzIzNTE0Mzc0OA==&mid=2247486015&idx=1&sn=bc5b7dea1d9621678e4cc49a85d736ae) -**作者**:GSDK安全团队 -**上传时间**:2024-12-25 19:41:48 -**简介**:HuntBack(反击狩猎),用于攻防演练中,防守方对恶意ip进行web指纹扫描与识别。在蓝队职守中,安全设备爆出恶意攻击ip地址,如果对方使用的是自己的服务器,并且搭建了一些安全业务,可使用本工具对目前已知工具进行探测 - ----------------------------------------- -#### 文章:[MDUT-Extend(MDUT-增强版) V1.2.0 Released](https://mp.weixin.qq.com/s?__biz=MzU0MzkzOTYzOQ==&mid=2247489554&idx=1&sn=d3d5aa81f68c323b815bcabe78f0b46a) -**作者**:黑伞安全 -**上传时间**:2024-12-25 19:38:38 -**简介**:None - ----------------------------------------- -#### 文章:[一款wifi数据抓包破解工具Wireshark](https://mp.weixin.qq.com/s?__biz=MzI1MzQwNjEzNA==&mid=2247484154&idx=1&sn=252b411b617f65ba4513c1dda0fe70aa) -**作者**:渗透测试知识学习 -**上传时间**:2024-12-25 19:35:56 -**简介**:wife数据包破解 - ----------------------------------------- -#### 文章:[银狐黑产组织最新免杀样本详细分析](https://mp.weixin.qq.com/s?__biz=MzA4ODEyODA3MQ==&mid=2247489745&idx=1&sn=92cfd13140b08317c1901f6f89c89239) -**作者**:安全分析与研究 -**上传时间**:2024-12-25 18:30:13 -**简介**:银狐黑产组织最新免杀样本详细分析 - ----------------------------------------- -#### 文章:[警惕!2024年全球零日漏洞利用呈现七大趋势](https://mp.weixin.qq.com/s?__biz=MzI4NDY2MDMwMw==&mid=2247513353&idx=1&sn=cc572d3391797a15aa66590d70d0ac96) -**作者**:安全内参 -**上传时间**:2024-12-25 18:14:14 -**简介**:零日漏洞的攻击目标迁移 - ----------------------------------------- -#### 文章:[Apache Tomcat 漏洞导致服务器易受RCE攻击](https://mp.weixin.qq.com/s?__biz=MzI2NTg4OTc5Nw==&mid=2247521893&idx=1&sn=867f98595849107577a98fcaf043a177) -**作者**:代码卫士 -**上传时间**:2024-12-25 18:11:51 -**简介**:速修复 - ----------------------------------------- -#### 文章:[绕过Elastic EDR进行横向移动](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=1&sn=6ee718605b5d67e3f68417bf664c46f8) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[探索 NASA CryptoLib 的 SDLS 实现中的漏洞](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=2&sn=9734352beed4645fcdc599733e607e22) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[利用 WDAC 武器化:粉碎 EDR 的梦想](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=3&sn=7fcef7477d3365c9f2905137b1be267e) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[《英雄无敌》4:修改pe导入表注入DLL扩展回城术功能](https://mp.weixin.qq.com/s?__biz=MjM5NTc2MDYxMw==&mid=2458587767&idx=1&sn=dd1c04637890c14cb9d72fb95bbb0010) -**作者**:看雪学苑 -**上传时间**:2024-12-25 17:59:58 -**简介**:看雪论坛作者ID:fdark - ----------------------------------------- -#### 文章:[【漏洞文字】博斯外贸管理软件 SQL注入](https://mp.weixin.qq.com/s?__biz=MzkyMTY1NDc2OA==&mid=2247487244&idx=1&sn=5011bd862eae6337a04f9e1673c7a184) -**作者**:小羊安全屋 -**上传时间**:2024-12-25 17:01:56 -**简介**:None - ----------------------------------------- -#### 文章:[【2024补天白帽黑客年度盛典】Windows服务进程漏洞挖掘](https://forum.butian.net/share/4089) -**来源**:subject -**上传时间**:2024-12-25 17:39:57 -**描述**:演讲议题:Windows服务进程漏洞挖掘 - ----------------------------------------- -#### 文章:[【2024补天白帽黑客年度盛典】大模型越狱攻击与评测](https://forum.butian.net/share/4088) -**来源**:subject -**上传时间**:2024-12-25 17:33:43 -**描述**:演讲议题:大模型越狱攻击与评测 - ----------------------------------------- -#### 文章:[【2024补天白帽黑客年度盛典】当今勒索病毒的攻与防](https://forum.butian.net/share/4087) -**来源**:subject -**上传时间**:2024-12-25 17:26:49 -**描述**:演讲议题:当今勒索病毒的攻与防 - ----------------------------------------- -#### 文章:[网安瞭望台第18期:警惕新型攻击利用Windows Defender绕过终端检测、CVE-2024-50379 漏洞利用工具分享](https://mp.weixin.qq.com/s?__biz=Mzg2NTkwODU3Ng==&mid=2247514556&idx=1&sn=a10e80238c91658489ebe6cc8657315c) -**作者**:东方隐侠安全团队 -**上传时间**:2024-12-25 20:31:30 -**简介**:网安资讯分享\\x0d\\x0aDAILY NEWS AND KNOWLEDGE - ----------------------------------------- -#### 文章:[MDUT-Extend(MDUT-增强版) V1.2.0 Released](https://mp.weixin.qq.com/s?__biz=MzI5NDg0ODkwMQ==&mid=2247486138&idx=1&sn=4f881e7e1cc99466d57aa3d95d980b3b) -**作者**:格格巫和蓝精灵 -**上传时间**:2024-12-25 20:16:51 -**简介**:None - ----------------------------------------- -#### 文章:[CobaltStrike Bof开发(1)](https://mp.weixin.qq.com/s?__biz=Mzg5MDg3OTc0OA==&mid=2247489138&idx=1&sn=3095870df2c9d365db698936abde43b2) -**作者**:Relay学安全 -**上传时间**:2024-12-25 20:05:52 -**简介**:None - ----------------------------------------- -#### 文章:[Sa7mon-S3scanner:一款针对S3 Bucket的错误配置扫描工具](https://mp.weixin.qq.com/s?__biz=MjM5NjA0NjgyMA==&mid=2651310595&idx=4&sn=78fdcc1150147cc6155e1a2e73c31521) -**作者**:FreeBuf -**上传时间**:2024-12-25 19:56:23 -**简介**:该工具兼容S3 API,可扫描开放S3 Bucket中潜在的错误配置。 - ----------------------------------------- -#### 文章:[免杀对抗从0开始(七)](https://mp.weixin.qq.com/s?__biz=Mzk0MzU5NTg1Ng==&mid=2247484849&idx=1&sn=f075965e73b511cfba0e53536232cf34) -**作者**:泾弦安全 -**上传时间**:2024-12-25 19:50:33 -**简介**:None - ----------------------------------------- -#### 文章:[针对护网行动中红队溯源工具 - HuntBack](https://mp.weixin.qq.com/s?__biz=MzIzNTE0Mzc0OA==&mid=2247486015&idx=1&sn=bc5b7dea1d9621678e4cc49a85d736ae) -**作者**:GSDK安全团队 -**上传时间**:2024-12-25 19:41:48 -**简介**:HuntBack(反击狩猎),用于攻防演练中,防守方对恶意ip进行web指纹扫描与识别。在蓝队职守中,安全设备爆出恶意攻击ip地址,如果对方使用的是自己的服务器,并且搭建了一些安全业务,可使用本工具对目前已知工具进行探测 - ----------------------------------------- -#### 文章:[MDUT-Extend(MDUT-增强版) V1.2.0 Released](https://mp.weixin.qq.com/s?__biz=MzU0MzkzOTYzOQ==&mid=2247489554&idx=1&sn=d3d5aa81f68c323b815bcabe78f0b46a) -**作者**:黑伞安全 -**上传时间**:2024-12-25 19:38:38 -**简介**:None - ----------------------------------------- -#### 文章:[一款wifi数据抓包破解工具Wireshark](https://mp.weixin.qq.com/s?__biz=MzI1MzQwNjEzNA==&mid=2247484154&idx=1&sn=252b411b617f65ba4513c1dda0fe70aa) -**作者**:渗透测试知识学习 -**上传时间**:2024-12-25 19:35:56 -**简介**:wife数据包破解 - ----------------------------------------- -#### 文章:[银狐黑产组织最新免杀样本详细分析](https://mp.weixin.qq.com/s?__biz=MzA4ODEyODA3MQ==&mid=2247489745&idx=1&sn=92cfd13140b08317c1901f6f89c89239) -**作者**:安全分析与研究 -**上传时间**:2024-12-25 18:30:13 -**简介**:银狐黑产组织最新免杀样本详细分析 - ----------------------------------------- -#### 文章:[警惕!2024年全球零日漏洞利用呈现七大趋势](https://mp.weixin.qq.com/s?__biz=MzI4NDY2MDMwMw==&mid=2247513353&idx=1&sn=cc572d3391797a15aa66590d70d0ac96) -**作者**:安全内参 -**上传时间**:2024-12-25 18:14:14 -**简介**:零日漏洞的攻击目标迁移 - ----------------------------------------- -#### 文章:[Apache Tomcat 漏洞导致服务器易受RCE攻击](https://mp.weixin.qq.com/s?__biz=MzI2NTg4OTc5Nw==&mid=2247521893&idx=1&sn=867f98595849107577a98fcaf043a177) -**作者**:代码卫士 -**上传时间**:2024-12-25 18:11:51 -**简介**:速修复 - ----------------------------------------- -#### 文章:[绕过Elastic EDR进行横向移动](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=1&sn=6ee718605b5d67e3f68417bf664c46f8) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[探索 NASA CryptoLib 的 SDLS 实现中的漏洞](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=2&sn=9734352beed4645fcdc599733e607e22) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[利用 WDAC 武器化:粉碎 EDR 的梦想](https://mp.weixin.qq.com/s?__biz=MzAxMjYyMzkwOA==&mid=2247526433&idx=3&sn=7fcef7477d3365c9f2905137b1be267e) -**作者**:Ots安全 -**上传时间**:2024-12-25 18:01:05 -**简介**:None - ----------------------------------------- -#### 文章:[《英雄无敌》4:修改pe导入表注入DLL扩展回城术功能](https://mp.weixin.qq.com/s?__biz=MjM5NTc2MDYxMw==&mid=2458587767&idx=1&sn=dd1c04637890c14cb9d72fb95bbb0010) -**作者**:看雪学苑 -**上传时间**:2024-12-25 17:59:58 -**简介**:看雪论坛作者ID:fdark - ----------------------------------------- -#### 文章:[【漏洞文字】博斯外贸管理软件 SQL注入](https://mp.weixin.qq.com/s?__biz=MzkyMTY1NDc2OA==&mid=2247487244&idx=1&sn=5011bd862eae6337a04f9e1673c7a184) -**作者**:小羊安全屋 -**上传时间**:2024-12-25 17:01:56 -**简介**:None - ----------------------------------------- diff --git a/media/github.py b/media/github.py deleted file mode 100644 index 633d4a7..0000000 --- a/media/github.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -import os -import requests -import xml.etree.ElementTree as ET -import json -from requests.exceptions import RequestException -from loguru import logger - - - -github_headers = { - 'Authorization': f"token {github_token}" -} - -# 抓取本年的 -year = datetime.datetime.now().year -api = "https://api.github.com/search/repositories?q=CVE-{}&sort=updated".format(year) -json_str = requests.get(api, headers=github_headers, timeout=10).json() \ No newline at end of file diff --git a/media/__pycache__/common.cpython-312.pyc b/spider/__pycache__/common.cpython-312.pyc similarity index 100% rename from media/__pycache__/common.cpython-312.pyc rename to spider/__pycache__/common.cpython-312.pyc diff --git a/media/__pycache__/freebuf.cpython-312.pyc b/spider/__pycache__/freebuf.cpython-312.pyc similarity index 100% rename from media/__pycache__/freebuf.cpython-312.pyc rename to spider/__pycache__/freebuf.cpython-312.pyc diff --git a/spider/__pycache__/github.cpython-312.pyc b/spider/__pycache__/github.cpython-312.pyc new file mode 100644 index 0000000..513e0c6 Binary files /dev/null and b/spider/__pycache__/github.cpython-312.pyc differ diff --git a/spider/__pycache__/sougou_wx.cpython-312.pyc b/spider/__pycache__/sougou_wx.cpython-312.pyc new file mode 100644 index 0000000..360b351 Binary files /dev/null and b/spider/__pycache__/sougou_wx.cpython-312.pyc differ diff --git a/media/__pycache__/xianzhi.cpython-312.pyc b/spider/__pycache__/xianzhi.cpython-312.pyc similarity index 87% rename from media/__pycache__/xianzhi.cpython-312.pyc rename to spider/__pycache__/xianzhi.cpython-312.pyc index 8d31c86..848798e 100644 Binary files a/media/__pycache__/xianzhi.cpython-312.pyc and b/spider/__pycache__/xianzhi.cpython-312.pyc differ diff --git a/media/common.py b/spider/common.py similarity index 100% rename from media/common.py rename to spider/common.py diff --git a/media/freebuf.py b/spider/freebuf.py similarity index 100% rename from media/freebuf.py rename to spider/freebuf.py diff --git a/spider/github.py b/spider/github.py new file mode 100644 index 0000000..56b160b --- /dev/null +++ b/spider/github.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +import time +import yaml +import requests +import json +import datetime +from requests.exceptions import RequestException +import xml.etree.ElementTree as ET +from loguru import logger + +MAX_DESCRIPTION_LENGTH = 300 + +with open('./config/github_config.yaml', 'r', encoding="utf-8") as file: + config = yaml.safe_load(file) + # list = yaml.load(f,Loader=yaml.FullLoader) +token = config['github_token'] +tool_list, keyword_list, user_list, black_words = config['tool_list'], config['keyword_list'], config['user_list'], config['black_words'] + +def fetch_rss(url, timeout=10): + if token is None: + headers = { + "Content-Type": "application/json" + } + else: + headers = { + 'Authorization': f"token {token}", + "Content-Type": "application/json" + } + + try: + response = requests.get(url, headers=headers, timeout=timeout) + response.raise_for_status() + return response.json() + except requests.Timeout: + logger.warning(f"请求 {url} 超时,跳过保存操作。") + except requests.exceptions.RequestException as e: + logger.error(f"请求 {url} 时发生错误: {e}") + +def save_to_json(data, filename): + with open(filename, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=4) + +def github_main_keyword(key): + all_results = [] # 用于存储所有结果 + for keyword in key: + logger.info(f"github_keyword:关键词【{keyword}】获取开始。") + api_node = "https://api.github.com/search/repositories?q={}&sort=updated&per_page=20".format(keyword) + result = fetch_rss(api_node) + for i in range(0, 20): + description = result['items'][i]['description'] + + if description is None: + pass + # 检查描述中是否包含黑名单词汇 + elif any(word in description for word in black_words): + continue # 跳过本次执行 + # 截断描述并在300字节处添加... + elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: + # 找到300字节处的索引 + byte_index = 0 + char_index = 0 + while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): + byte_index += len(description[char_index].encode('utf-8')) + char_index += 1 + description = description[:char_index - 1] + '...' + + link = result['items'][i]['html_url'] + name = result['items'][i]['name'] + created_at = result['items'][i]['created_at'] + author = result['items'][i]['owner']['login'] + language = result['items'][i]['language'] + + # 将每个项目的详细信息存储在一个字典中 + project_info = { + 'link': link, + 'name': name, + 'created_at': created_at, + 'description': description, + 'author': author, + 'language': language, + 'keyword': keyword + } + # print(project_info) + all_results.append(project_info) + time.sleep(10) + + # 将所有结果写入JSON文件 + save_to_json(all_results, './JSON/github_keyword.json') + + +def github_main_repo(key): + all_results = [] # 用于存储所有结果 + for keyword in key: + logger.info(f"github_repo:项目【{keyword}】更新情况获取开始。") + api_node = "https://api.github.com/repos/{}/commits?per_page=1".format(keyword) + result = fetch_rss(api_node) + + commit = result[0] # 获取最新的提交记录 + description = commit['commit']['message'] + + if description is None: + pass + # 截断描述并在300字节处添加... + elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: + byte_index = 0 + char_index = 0 + while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): + byte_index += len(description[char_index].encode('utf-8')) + char_index += 1 + description = description[:char_index - 1] + '...' + + author = commit['commit']['author']['name'] + updated_at = commit['commit']['author']['date'] + link_2 = commit['html_url'] + + # 将每个项目的详细信息存储在一个字典中 + project_info = { + 'link': f"https://api.github.com/{keyword}", + 'name': keyword, + 'updated_at': updated_at, + 'description': description, + 'author': author, + 'link_2': link_2, + 'keyword': keyword + } + # print(project_info) + all_results.append(project_info) + time.sleep(10) + + # 将所有结果写入JSON文件 + save_to_json(all_results, './JSON/github_repo.json') + +def github_main_user(key): + all_results = [] # 用于存储所有结果 + for keyword in key: + logger.info(f"github_user:作者【{keyword}】更新情况获取开始。") + api_node = "https://api.github.com/users/{}/repos?sort=created&per_page=10".format(keyword) + result = fetch_rss(api_node) + for i in range(0, len(result)): + description = result[i]['description'] + + if description is None: + pass + # 检查描述中是否包含黑名单词汇 + elif any(word in description for word in black_words): + continue # 跳过本次执行 + # 截断描述并在300字节处添加... + elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: + # 找到300字节处的索引 + byte_index = 0 + char_index = 0 + while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): + byte_index += len(description[char_index].encode('utf-8')) + char_index += 1 + description = description[:char_index - 1] + '...' + + link = result[i]['html_url'] + name = result[i]['name'] + created_at = result[i]['created_at'] + author = result[i]['owner']['login'] + language = result[i]['language'] + + # 将每个项目的详细信息存储在一个字典中 + project_info = { + 'link': link, + 'name': name, + 'created_at': created_at, + 'description': description, + 'author': author, + 'language': language, + 'keyword': keyword + } + # print(project_info) + all_results.append(project_info) + time.sleep(10) + + # 将所有结果写入JSON文件 + save_to_json(all_results, './JSON/github_user.json') + +def github_main(): + if keyword_list: + github_main_keyword(keyword_list) + if tool_list: + github_main_repo(tool_list) + if user_list: + github_main_user(user_list) + +if __name__ == "__main__": + github_main() \ No newline at end of file diff --git a/media/sougou-wx.py b/spider/sougou_wx.py similarity index 95% rename from media/sougou-wx.py rename to spider/sougou_wx.py index b12d7d7..8131ae8 100644 --- a/media/sougou-wx.py +++ b/spider/sougou_wx.py @@ -27,9 +27,10 @@ def fetch_html(url, headers=headers, timeout=10): response = requests.get(url, headers=headers, timeout=timeout) response.raise_for_status() return response.text + except requests.Timeout: + logger.warning(f"请求 {url} 超时,跳过保存操作。") except requests.exceptions.RequestException as e: - print(f"请求出错: {e}") - return None + logger.error(f"请求 {url} 时发生错误: {e}") def parse_html(html_content): soup = BeautifulSoup(html_content, 'html.parser') diff --git a/media/xianzhi.py b/spider/xianzhi.py similarity index 100% rename from media/xianzhi.py rename to spider/xianzhi.py