From e594ca1711cf6a20900a16282d1bda1b32f1eb09 Mon Sep 17 00:00:00 2001 From: MasonLiu <2857911564@qq.com> Date: Thu, 2 Jan 2025 16:03:25 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86=E5=AF=B9github-rele?= =?UTF-8?q?ase=E6=83=85=E5=86=B5=E7=9A=84=E7=9B=91=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Core.py | 6 ++- GotoSend/github.py | 82 ++++++++++++++++++++++++++++++++------- config/github_config.yaml | 2 +- spider/github.py | 39 +++++++++++++++++-- 4 files changed, 109 insertions(+), 20 deletions(-) diff --git a/Core.py b/Core.py index df8e041..6e5e7f3 100644 --- a/Core.py +++ b/Core.py @@ -134,12 +134,14 @@ def send_job_SX(): def send_job_github(time_1): github_main() - result_github_1_long, result_github_2_long, result_github_3_long = Src_github(time_1, False) - result_github_1_short, result_github_2_short, result_github_3_short = Src_github(time_1, True) + result_github_1_long, result_github_2_long, result_github_3_long, result_github_4_long = Src_github(time_1, False) + result_github_1_short, result_github_2_short, result_github_3_short, result_github_4_short = Src_github(time_1, True) webhook_url, timestamp, sign = gen_sign() check_avaliable(result_github_1_long, result_github_1_short, "Github项目监控-关键词监控", webhook_url, timestamp, sign) check_avaliable(result_github_2_long, result_github_2_short, "Github项目监控-项目更新情况", webhook_url, timestamp, sign) + webhook_url, timestamp, sign = gen_sign() check_avaliable(result_github_3_long, result_github_3_short, "Github项目监控-大佬工具", webhook_url, timestamp, sign) + check_avaliable(result_github_4_long, result_github_4_short, "Github项目监控-项目版本发布监测", webhook_url, timestamp, sign) # 探测rss源状态 def check_rss_status(url): diff --git a/GotoSend/github.py b/GotoSend/github.py index 83779fa..1edf230 100644 --- a/GotoSend/github.py +++ b/GotoSend/github.py @@ -31,6 +31,13 @@ def create_database(): keyword TEXT, link2 TEXT ); + CREATE TABLE IF NOT EXISTS releases ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + link TEXT, + pubDate DATETIME, + author TEXT, + keyword TEXT + ); CREATE TABLE IF NOT EXISTS users ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT, @@ -66,6 +73,13 @@ def insert_data(): if not isinstance(data_repo, list): raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") + if not os.path.exists('./JSON/github_release.json'): + raise FileNotFoundError(f"github_release文件不存在,请检查程序是否运行正常!") + with open('./JSON/github_release.json', 'r', encoding='utf-8') as file: + data_release = json.load(file) + if not isinstance(data_release, list): + raise ValueError("JSON文件格式错误,请检查爬取程序是否异常!") + if not os.path.exists('./JSON/github_user.json'): raise FileNotFoundError(f"github_user文件不存在,请检查程序是否运行正常!") with open('./JSON/github_user.json', 'r', encoding='utf-8') as file: @@ -134,6 +148,28 @@ def insert_data(): INSERT INTO repos (title, link, description, pubDate, author, link2, keyword) VALUES (?, ?, ?, ?, ?, ?, ?) ''', (entry['title'], entry['link'], entry['description'], formatted_pub_date, entry['author'], entry['link2'], entry['keyword'])) + + for index, item in enumerate(data_release, start=1): + entry = { + "id": index, + "link": item.get("link", ""), + "pubDate": item.get("published_at", ""), + "author": item.get("author", ""), + "keyword": item.get("keyword", "") + } + try: + # 解析 pubDate 字符串为 datetime 对象 + pub_date = datetime.strptime(entry['pubDate'], '%Y-%m-%dT%H:%M:%SZ') + # 格式化 pubDate 为所需的格式 + formatted_pub_date = pub_date.strftime('%Y-%m-%d %H:%M:%S') + except ValueError: + # 如果解析失败,使用原始 pubDate 字符串 + formatted_pub_date = entry['pubDate'] + + cursor.execute(''' + INSERT INTO releases (link, pubDate, author, keyword) + VALUES (?, ?, ?, ?) + ''', (entry['link'], formatted_pub_date, entry['author'], entry['keyword'])) # 插入 users 数据 @@ -215,14 +251,6 @@ def select_articles(e_hour): ORDER BY pubDate DESC LIMIT 5 ''', (two_months_ago.strftime('%Y-%m-%d %H:%M:%S'), now.strftime('%Y-%m-%d %H:%M:%S'))) - - # 查询最近的3条未被标记为True的消息 - # cursor.execute(''' - # SELECT * FROM users - # WHERE is_sended IS NULL - # ORDER BY pubDate DESC - # LIMIT 5 - # ''') result_3 = cursor.fetchall() # print(results) @@ -238,15 +266,23 @@ def select_articles(e_hour): conn.commit() # 提交事务 + cursor.execute(''' + SELECT * FROM releases + WHERE pubDate BETWEEN ? AND ? + ''', (start_time.strftime('%Y-%m-%d %H:%M:%S'), now.strftime('%Y-%m-%d %H:%M:%S'))) + + result_4 = cursor.fetchall() + cursor.close() conn.close() - return result_1, result_2, result_3 + return result_1, result_2, result_3, result_4 def clear_table(): conn = sqlite3.connect('./db/github.db') cursor = conn.cursor() cursor.execute('DELETE FROM repos') + cursor.execute('DELETE FROM releases') conn.commit() conn.close() @@ -287,6 +323,10 @@ def get_filtered_articles(entries, Is_short, choice): result += f"项目描述:{entry[3]}\n" result += f"上传时间:{entry[4]}\n" result += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 4: + result += f"【{entry[3]}】为[{entry[4]}]({entry[1]})发布了新版本,请及时查收!\n" + result += f"发布时间:{entry[2]}\n" + result += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 elif Is_short == True: if choice == 1: result += f"关键词【{entry[7]}】发现新项目:[{entry[1]}]({entry[2]})\n" @@ -304,6 +344,10 @@ def get_filtered_articles(entries, Is_short, choice): result += f"项目描述:{entry[3]}\n" result += f"上传时间:{entry[4]}\n" result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 4: + result += f"【{entry[3]}】为[{entry[4]}]({entry[1]})发布了新版本,请及时查收!\n" + result += f"发布时间:{entry[2]}\n" + result += "\n" + "-" * 3 + "\n" # 添加分隔线以便区分不同文章 if choice == 1: record += f"#### 关键词【{entry[7]}】发现新项目:[{entry[1]}]({entry[2]})\n" @@ -322,6 +366,10 @@ def get_filtered_articles(entries, Is_short, choice): record += f"**项目描述**:{entry[3]}\n" record += f"**上传时间**:{entry[4]}\n" record += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 + elif choice == 4: + record += f"#### 【{entry[3]}】为[{entry[4]}]({entry[1]})发布了新版本,请及时查收!\n" + record += f"**发布时间**:{entry[2]}\n" + record += "\n" + "-" * 10 + "\n" # 添加分隔线以便区分不同文章 record_md(record) return result @@ -339,7 +387,7 @@ def Src_github(e_hour, Is_short): insert_data() # 查询指定时间段内的数据 - keyword_data, repo_data, user_data = select_articles(e_hour) + keyword_data, repo_data, user_data, release_data = select_articles(e_hour) if keyword_data: result_1 = get_filtered_articles(keyword_data, Is_short, 1) @@ -353,17 +401,23 @@ def Src_github(e_hour, Is_short): result_3 = get_filtered_articles(user_data, Is_short, 3) else: result_3 = "" - return result_1, result_2, result_3 + if release_data: + result_4 = get_filtered_articles(release_data, Is_short, 4) + else: + result_4 = "" + return result_1, result_2, result_3, result_4 if __name__ == "__main__": - result_1, result_2, result_3 = Src_github(24, False) + result_1, result_2, result_3, result_4 = Src_github(24000, False) if result_1 != "": print(result_1) - elif result_2 != "": + if result_2 != "": print(result_2) if result_3 != "": print(result_3) - else: + if result_4 != "": + print(result_4) + if result_1 == "" and result_2 == "" and result_3 == "" and result_4 == "": # 如果为空,则跳过执行 print("-" * 10) print("github数据为空,跳过执行。") \ No newline at end of file diff --git a/config/github_config.yaml b/config/github_config.yaml index c58ca29..7a3a65b 100644 --- a/config/github_config.yaml +++ b/config/github_config.yaml @@ -17,8 +17,8 @@ tool_list: # 监控已创建的仓库是否更新 - shadow1ng/fscan - SafeGroceryStore/MDUT - EdgeSecurityTeam/Vulnerability - - wy876/POC - Vme18000yuan/FreePOC + - wy876/POC keyword_list: # 监控关键词 - sql注入 diff --git a/spider/github.py b/spider/github.py index 56b160b..1dded87 100644 --- a/spider/github.py +++ b/spider/github.py @@ -82,7 +82,7 @@ def github_main_keyword(key): } # print(project_info) all_results.append(project_info) - time.sleep(10) + time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './JSON/github_keyword.json') @@ -125,11 +125,43 @@ def github_main_repo(key): } # print(project_info) all_results.append(project_info) - time.sleep(10) + time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './JSON/github_repo.json') +def github_main_release(key): + all_results = [] # 用于存储所有结果 + for keyword in key: + logger.info(f"github_repo:项目【{keyword}】发版情况获取开始。") + api_node = "https://api.github.com/repos/{}/releases?per_page=1".format(keyword) + result = fetch_rss(api_node) + if not result: + logger.warning(f"github_repo:项目【{keyword}】不存在版本发布情况。") + time.sleep(5) + continue + + # print(result) + # print(keyword) + commit = result[0] # 获取最新的提交记录 + author = commit['author']['login'] + published_at = commit['published_at'] + link = commit['html_url'] + + # 将每个项目的详细信息存储在一个字典中 + project_info = { + 'link': link, + 'published_at': published_at, + 'author': author, + 'keyword': keyword + } + # print(project_info) + all_results.append(project_info) + time.sleep(5) + + # 将所有结果写入JSON文件 + save_to_json(all_results, './JSON/github_release.json') + def github_main_user(key): all_results = [] # 用于存储所有结果 for keyword in key: @@ -172,7 +204,7 @@ def github_main_user(key): } # print(project_info) all_results.append(project_info) - time.sleep(10) + time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './JSON/github_user.json') @@ -182,6 +214,7 @@ def github_main(): github_main_keyword(keyword_list) if tool_list: github_main_repo(tool_list) + github_main_release(tool_list) if user_list: github_main_user(user_list)