236 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			236 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | |
| import time
 | |
| import yaml
 | |
| import requests
 | |
| import json
 | |
| import datetime
 | |
| from requests.exceptions import RequestException
 | |
| import xml.etree.ElementTree as ET
 | |
| from loguru import logger
 | |
| 
 | |
| MAX_DESCRIPTION_LENGTH = 300
 | |
| 
 | |
| with open('./config/github_config.yaml', 'r', encoding="utf-8") as file:
 | |
|     config = yaml.safe_load(file)
 | |
|     # list = yaml.load(f,Loader=yaml.FullLoader)
 | |
| token = config['github_token']
 | |
| tool_list, keyword_list, user_list, black_words = config['tool_list'], config['keyword_list'], config['user_list'], config['black_words']
 | |
| 
 | |
| def fetch_rss(url, timeout=10):
 | |
|     if token is None:
 | |
|         headers = {
 | |
|             "Content-Type": "application/json"
 | |
|         }
 | |
|     else:
 | |
|         headers = {
 | |
|             'Authorization': f"token {token}",
 | |
|             "Content-Type": "application/json"
 | |
|         }
 | |
| 
 | |
|     try:
 | |
|         response = requests.get(url, headers=headers, timeout=timeout)
 | |
|         response.raise_for_status()
 | |
|         return response.json()
 | |
|     except requests.Timeout:
 | |
|         logger.warning(f"请求 {url} 超时,跳过保存操作。")
 | |
|         return None
 | |
|     except requests.exceptions.RequestException as e:
 | |
|         logger.error(f"请求 {url} 时发生错误: {e}")
 | |
|         return None
 | |
| 
 | |
| def save_to_json(data, filename):
 | |
|     with open(filename, 'w', encoding='utf-8') as f:
 | |
|         json.dump(data, f, ensure_ascii=False, indent=4)
 | |
| 
 | |
| def github_main_keyword(key):
 | |
|     all_results = []  # 用于存储所有结果
 | |
|     for keyword in key:
 | |
|         logger.info(f"github_keyword:关键词【{keyword}】获取开始。")
 | |
|         api_node = "https://api.github.com/search/repositories?q={}&sort=updated&per_page=20".format(keyword)
 | |
|         result = fetch_rss(api_node)
 | |
|         if result == None:
 | |
|             time.sleep(5)
 | |
|             continue
 | |
|         for i in range(0, 20):
 | |
|             description = result['items'][i]['description']
 | |
|             
 | |
|             if description is None:
 | |
|                 pass
 | |
|             # 检查描述中是否包含黑名单词汇
 | |
|             elif any(word in description for word in black_words):
 | |
|                 continue  # 跳过本次执行
 | |
|             # 截断描述并在300字节处添加...
 | |
|             elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
 | |
|                 # 找到300字节处的索引
 | |
|                 byte_index = 0
 | |
|                 char_index = 0
 | |
|                 while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
 | |
|                     byte_index += len(description[char_index].encode('utf-8'))
 | |
|                     char_index += 1
 | |
|                 description = description[:char_index - 1] + '...'
 | |
|             
 | |
|             link = result['items'][i]['html_url']
 | |
|             name = result['items'][i]['name']
 | |
|             created_at = result['items'][i]['created_at']
 | |
|             author = result['items'][i]['owner']['login']
 | |
|             language = result['items'][i]['language']
 | |
|             
 | |
|             # 将每个项目的详细信息存储在一个字典中
 | |
|             project_info = {
 | |
|                 'link': link,
 | |
|                 'name': name,
 | |
|                 'created_at': created_at,
 | |
|                 'description': description,
 | |
|                 'author': author,
 | |
|                 'language': language,
 | |
|                 'keyword': keyword
 | |
|             }
 | |
|             # print(project_info)
 | |
|             all_results.append(project_info)
 | |
|         time.sleep(5)
 | |
|     
 | |
|     # 将所有结果写入JSON文件
 | |
|     save_to_json(all_results, './resources/JSON/github_keyword.json')
 | |
| 
 | |
| 
 | |
| def github_main_repo(key):
 | |
|     all_results = []  # 用于存储所有结果
 | |
|     for keyword in key:
 | |
|         logger.info(f"github_repo:项目【{keyword}】更新情况获取开始。")
 | |
|         api_node = "https://api.github.com/repos/{}/commits?per_page=1".format(keyword)
 | |
|         result = fetch_rss(api_node)
 | |
|         if result == None:
 | |
|             time.sleep(5)
 | |
|             continue
 | |
|         
 | |
|         commit = result[0]  # 获取最新的提交记录
 | |
|         description = commit['commit']['message']
 | |
| 
 | |
|         if description is None:
 | |
|             pass
 | |
|         # 截断描述并在300字节处添加...
 | |
|         elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
 | |
|             byte_index = 0
 | |
|             char_index = 0
 | |
|             while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
 | |
|                 byte_index += len(description[char_index].encode('utf-8'))
 | |
|                 char_index += 1
 | |
|             description = description[:char_index - 1] + '...'
 | |
| 
 | |
|         author = commit['commit']['author']['name']
 | |
|         updated_at = commit['commit']['author']['date']
 | |
|         link_2 = commit['html_url']
 | |
| 
 | |
|         # 将每个项目的详细信息存储在一个字典中
 | |
|         project_info = {
 | |
|             'link': f"https://api.github.com/{keyword}",
 | |
|             'name': keyword,
 | |
|             'updated_at': updated_at,
 | |
|             'description': description,
 | |
|             'author': author,
 | |
|             'link_2': link_2,
 | |
|             'keyword': keyword
 | |
|         }
 | |
|         # print(project_info)
 | |
|         all_results.append(project_info)
 | |
|         time.sleep(5)
 | |
| 
 | |
|     # 将所有结果写入JSON文件
 | |
|     save_to_json(all_results, './resources/JSON/github_repo.json')
 | |
| 
 | |
| def github_main_release(key):
 | |
|     all_results = []  # 用于存储所有结果
 | |
|     for keyword in key:
 | |
|         logger.info(f"github_repo:项目【{keyword}】发版情况获取开始。")
 | |
|         api_node = "https://api.github.com/repos/{}/releases?per_page=1".format(keyword)
 | |
|         result = fetch_rss(api_node)
 | |
|         if result == None:
 | |
|             time.sleep(5)
 | |
|             continue
 | |
|         if not result:
 | |
|             logger.warning(f"github_repo:项目【{keyword}】不存在版本发布情况。")
 | |
|             time.sleep(5)
 | |
|             continue
 | |
|         
 | |
|         # print(result)
 | |
|         # print(keyword)
 | |
|         commit = result[0]  # 获取最新的提交记录
 | |
|         author = commit['author']['login']
 | |
|         published_at = commit['published_at']
 | |
|         link = commit['html_url']
 | |
| 
 | |
|         # 将每个项目的详细信息存储在一个字典中
 | |
|         project_info = {
 | |
|             'link': link,
 | |
|             'published_at': published_at,
 | |
|             'author': author,
 | |
|             'keyword': keyword
 | |
|         }
 | |
|         # print(project_info)
 | |
|         all_results.append(project_info)
 | |
|         time.sleep(5)
 | |
| 
 | |
|     # 将所有结果写入JSON文件
 | |
|     save_to_json(all_results, './resources/JSON/github_release.json')
 | |
| 
 | |
| def github_main_user(key):
 | |
|     all_results = []  # 用于存储所有结果
 | |
|     for keyword in key:
 | |
|         logger.info(f"github_user:作者【{keyword}】更新情况获取开始。")
 | |
|         api_node = "https://api.github.com/users/{}/repos?sort=created&per_page=10".format(keyword)
 | |
|         result = fetch_rss(api_node)
 | |
|         if result == None:
 | |
|             time.sleep(5)
 | |
|             continue
 | |
|         for i in range(0, len(result)):
 | |
|             description = result[i]['description']
 | |
|             
 | |
|             if description is None:
 | |
|                 pass
 | |
|             # 检查描述中是否包含黑名单词汇
 | |
|             elif any(word in description for word in black_words):
 | |
|                 continue  # 跳过本次执行
 | |
|             # 截断描述并在300字节处添加...
 | |
|             elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
 | |
|                 # 找到300字节处的索引
 | |
|                 byte_index = 0
 | |
|                 char_index = 0
 | |
|                 while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
 | |
|                     byte_index += len(description[char_index].encode('utf-8'))
 | |
|                     char_index += 1
 | |
|                 description = description[:char_index - 1] + '...'
 | |
|             
 | |
|             link = result[i]['html_url']
 | |
|             name = result[i]['name']
 | |
|             created_at = result[i]['created_at']
 | |
|             author = result[i]['owner']['login']
 | |
|             language = result[i]['language']
 | |
|             
 | |
|             # 将每个项目的详细信息存储在一个字典中
 | |
|             project_info = {
 | |
|                 'link': link,
 | |
|                 'name': name,
 | |
|                 'created_at': created_at,
 | |
|                 'description': description,
 | |
|                 'author': author,
 | |
|                 'language': language,
 | |
|                 'keyword': keyword
 | |
|             }
 | |
|             # print(project_info)
 | |
|             all_results.append(project_info)
 | |
|         time.sleep(5)
 | |
|     
 | |
|     # 将所有结果写入JSON文件
 | |
|     save_to_json(all_results, './resources/JSON/github_user.json')
 | |
| 
 | |
| def github_main():
 | |
|     if keyword_list:
 | |
|         github_main_keyword(keyword_list)
 | |
|     if tool_list:
 | |
|         github_main_repo(tool_list)
 | |
|         github_main_release(tool_list)
 | |
|     if user_list:
 | |
|         github_main_user(user_list)
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     github_main() |