# -*- coding: utf-8 -*- import time import yaml import requests import json import datetime from requests.exceptions import RequestException import xml.etree.ElementTree as ET from loguru import logger MAX_DESCRIPTION_LENGTH = 300 with open('./config/github_config.yaml', 'r', encoding="utf-8") as file: config = yaml.safe_load(file) # list = yaml.load(f,Loader=yaml.FullLoader) token = config['github_token'] tool_list, keyword_list, user_list, black_words = config['tool_list'], config['keyword_list'], config['user_list'], config['black_words'] def fetch_rss(url, timeout=10): if token is None: headers = { "Content-Type": "application/json" } else: headers = { 'Authorization': f"token {token}", "Content-Type": "application/json" } try: response = requests.get(url, headers=headers, timeout=timeout) response.raise_for_status() return response.json() except requests.Timeout: logger.warning(f"请求 {url} 超时,跳过保存操作。") except requests.exceptions.RequestException as e: logger.error(f"请求 {url} 时发生错误: {e}") def save_to_json(data, filename): with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=4) def github_main_keyword(key): all_results = [] # 用于存储所有结果 for keyword in key: logger.info(f"github_keyword:关键词【{keyword}】获取开始。") api_node = "https://api.github.com/search/repositories?q={}&sort=updated&per_page=20".format(keyword) result = fetch_rss(api_node) for i in range(0, 20): description = result['items'][i]['description'] if description is None: pass # 检查描述中是否包含黑名单词汇 elif any(word in description for word in black_words): continue # 跳过本次执行 # 截断描述并在300字节处添加... elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: # 找到300字节处的索引 byte_index = 0 char_index = 0 while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): byte_index += len(description[char_index].encode('utf-8')) char_index += 1 description = description[:char_index - 1] + '...' link = result['items'][i]['html_url'] name = result['items'][i]['name'] created_at = result['items'][i]['created_at'] author = result['items'][i]['owner']['login'] language = result['items'][i]['language'] # 将每个项目的详细信息存储在一个字典中 project_info = { 'link': link, 'name': name, 'created_at': created_at, 'description': description, 'author': author, 'language': language, 'keyword': keyword } # print(project_info) all_results.append(project_info) time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './resources/JSON/github_keyword.json') def github_main_repo(key): all_results = [] # 用于存储所有结果 for keyword in key: logger.info(f"github_repo:项目【{keyword}】更新情况获取开始。") api_node = "https://api.github.com/repos/{}/commits?per_page=1".format(keyword) result = fetch_rss(api_node) commit = result[0] # 获取最新的提交记录 description = commit['commit']['message'] if description is None: pass # 截断描述并在300字节处添加... elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: byte_index = 0 char_index = 0 while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): byte_index += len(description[char_index].encode('utf-8')) char_index += 1 description = description[:char_index - 1] + '...' author = commit['commit']['author']['name'] updated_at = commit['commit']['author']['date'] link_2 = commit['html_url'] # 将每个项目的详细信息存储在一个字典中 project_info = { 'link': f"https://api.github.com/{keyword}", 'name': keyword, 'updated_at': updated_at, 'description': description, 'author': author, 'link_2': link_2, 'keyword': keyword } # print(project_info) all_results.append(project_info) time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './resources/JSON/github_repo.json') def github_main_release(key): all_results = [] # 用于存储所有结果 for keyword in key: logger.info(f"github_repo:项目【{keyword}】发版情况获取开始。") api_node = "https://api.github.com/repos/{}/releases?per_page=1".format(keyword) result = fetch_rss(api_node) if not result: logger.warning(f"github_repo:项目【{keyword}】不存在版本发布情况。") time.sleep(5) continue # print(result) # print(keyword) commit = result[0] # 获取最新的提交记录 author = commit['author']['login'] published_at = commit['published_at'] link = commit['html_url'] # 将每个项目的详细信息存储在一个字典中 project_info = { 'link': link, 'published_at': published_at, 'author': author, 'keyword': keyword } # print(project_info) all_results.append(project_info) time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './resources/JSON/github_release.json') def github_main_user(key): all_results = [] # 用于存储所有结果 for keyword in key: logger.info(f"github_user:作者【{keyword}】更新情况获取开始。") api_node = "https://api.github.com/users/{}/repos?sort=created&per_page=10".format(keyword) result = fetch_rss(api_node) for i in range(0, len(result)): description = result[i]['description'] if description is None: pass # 检查描述中是否包含黑名单词汇 elif any(word in description for word in black_words): continue # 跳过本次执行 # 截断描述并在300字节处添加... elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH: # 找到300字节处的索引 byte_index = 0 char_index = 0 while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description): byte_index += len(description[char_index].encode('utf-8')) char_index += 1 description = description[:char_index - 1] + '...' link = result[i]['html_url'] name = result[i]['name'] created_at = result[i]['created_at'] author = result[i]['owner']['login'] language = result[i]['language'] # 将每个项目的详细信息存储在一个字典中 project_info = { 'link': link, 'name': name, 'created_at': created_at, 'description': description, 'author': author, 'language': language, 'keyword': keyword } # print(project_info) all_results.append(project_info) time.sleep(5) # 将所有结果写入JSON文件 save_to_json(all_results, './resources/JSON/github_user.json') def github_main(): if keyword_list: github_main_keyword(keyword_list) if tool_list: github_main_repo(tool_list) github_main_release(tool_list) if user_list: github_main_user(user_list) if __name__ == "__main__": github_main()