236 lines
8.5 KiB
Python
236 lines
8.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
import time
|
|
import yaml
|
|
import requests
|
|
import json
|
|
import datetime
|
|
from requests.exceptions import RequestException
|
|
import xml.etree.ElementTree as ET
|
|
from loguru import logger
|
|
|
|
MAX_DESCRIPTION_LENGTH = 300
|
|
|
|
with open('./config/github_config.yaml', 'r', encoding="utf-8") as file:
|
|
config = yaml.safe_load(file)
|
|
# list = yaml.load(f,Loader=yaml.FullLoader)
|
|
token = config['github_token']
|
|
tool_list, keyword_list, user_list, black_words = config['tool_list'], config['keyword_list'], config['user_list'], config['black_words']
|
|
|
|
def fetch_rss(url, timeout=10):
|
|
if token is None:
|
|
headers = {
|
|
"Content-Type": "application/json"
|
|
}
|
|
else:
|
|
headers = {
|
|
'Authorization': f"token {token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
try:
|
|
response = requests.get(url, headers=headers, timeout=timeout)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.Timeout:
|
|
logger.warning(f"请求 {url} 超时,跳过保存操作。")
|
|
return None
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"请求 {url} 时发生错误: {e}")
|
|
return None
|
|
|
|
def save_to_json(data, filename):
|
|
with open(filename, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
|
|
def github_main_keyword(key):
|
|
all_results = [] # 用于存储所有结果
|
|
for keyword in key:
|
|
logger.info(f"github_keyword:关键词【{keyword}】获取开始。")
|
|
api_node = "https://api.github.com/search/repositories?q={}&sort=updated&per_page=20".format(keyword)
|
|
result = fetch_rss(api_node)
|
|
if result == None:
|
|
time.sleep(5)
|
|
continue
|
|
for i in range(0, 20):
|
|
description = result['items'][i]['description']
|
|
|
|
if description is None:
|
|
pass
|
|
# 检查描述中是否包含黑名单词汇
|
|
elif any(word in description for word in black_words):
|
|
continue # 跳过本次执行
|
|
# 截断描述并在300字节处添加...
|
|
elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
|
|
# 找到300字节处的索引
|
|
byte_index = 0
|
|
char_index = 0
|
|
while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
|
|
byte_index += len(description[char_index].encode('utf-8'))
|
|
char_index += 1
|
|
description = description[:char_index - 1] + '...'
|
|
|
|
link = result['items'][i]['html_url']
|
|
name = result['items'][i]['name']
|
|
created_at = result['items'][i]['created_at']
|
|
author = result['items'][i]['owner']['login']
|
|
language = result['items'][i]['language']
|
|
|
|
# 将每个项目的详细信息存储在一个字典中
|
|
project_info = {
|
|
'link': link,
|
|
'name': name,
|
|
'created_at': created_at,
|
|
'description': description,
|
|
'author': author,
|
|
'language': language,
|
|
'keyword': keyword
|
|
}
|
|
# print(project_info)
|
|
all_results.append(project_info)
|
|
time.sleep(5)
|
|
|
|
# 将所有结果写入JSON文件
|
|
save_to_json(all_results, './resources/JSON/github_keyword.json')
|
|
|
|
|
|
def github_main_repo(key):
|
|
all_results = [] # 用于存储所有结果
|
|
for keyword in key:
|
|
logger.info(f"github_repo:项目【{keyword}】更新情况获取开始。")
|
|
api_node = "https://api.github.com/repos/{}/commits?per_page=1".format(keyword)
|
|
result = fetch_rss(api_node)
|
|
if result == None:
|
|
time.sleep(5)
|
|
continue
|
|
|
|
commit = result[0] # 获取最新的提交记录
|
|
description = commit['commit']['message']
|
|
|
|
if description is None:
|
|
pass
|
|
# 截断描述并在300字节处添加...
|
|
elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
|
|
byte_index = 0
|
|
char_index = 0
|
|
while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
|
|
byte_index += len(description[char_index].encode('utf-8'))
|
|
char_index += 1
|
|
description = description[:char_index - 1] + '...'
|
|
|
|
author = commit['commit']['author']['name']
|
|
updated_at = commit['commit']['author']['date']
|
|
link_2 = commit['html_url']
|
|
|
|
# 将每个项目的详细信息存储在一个字典中
|
|
project_info = {
|
|
'link': f"https://api.github.com/{keyword}",
|
|
'name': keyword,
|
|
'updated_at': updated_at,
|
|
'description': description,
|
|
'author': author,
|
|
'link_2': link_2,
|
|
'keyword': keyword
|
|
}
|
|
# print(project_info)
|
|
all_results.append(project_info)
|
|
time.sleep(5)
|
|
|
|
# 将所有结果写入JSON文件
|
|
save_to_json(all_results, './resources/JSON/github_repo.json')
|
|
|
|
def github_main_release(key):
|
|
all_results = [] # 用于存储所有结果
|
|
for keyword in key:
|
|
logger.info(f"github_repo:项目【{keyword}】发版情况获取开始。")
|
|
api_node = "https://api.github.com/repos/{}/releases?per_page=1".format(keyword)
|
|
result = fetch_rss(api_node)
|
|
if result == None:
|
|
time.sleep(5)
|
|
continue
|
|
if not result:
|
|
logger.warning(f"github_repo:项目【{keyword}】不存在版本发布情况。")
|
|
time.sleep(5)
|
|
continue
|
|
|
|
# print(result)
|
|
# print(keyword)
|
|
commit = result[0] # 获取最新的提交记录
|
|
author = commit['author']['login']
|
|
published_at = commit['published_at']
|
|
link = commit['html_url']
|
|
|
|
# 将每个项目的详细信息存储在一个字典中
|
|
project_info = {
|
|
'link': link,
|
|
'published_at': published_at,
|
|
'author': author,
|
|
'keyword': keyword
|
|
}
|
|
# print(project_info)
|
|
all_results.append(project_info)
|
|
time.sleep(5)
|
|
|
|
# 将所有结果写入JSON文件
|
|
save_to_json(all_results, './resources/JSON/github_release.json')
|
|
|
|
def github_main_user(key):
|
|
all_results = [] # 用于存储所有结果
|
|
for keyword in key:
|
|
logger.info(f"github_user:作者【{keyword}】更新情况获取开始。")
|
|
api_node = "https://api.github.com/users/{}/repos?sort=created&per_page=10".format(keyword)
|
|
result = fetch_rss(api_node)
|
|
if result == None:
|
|
time.sleep(5)
|
|
continue
|
|
for i in range(0, len(result)):
|
|
description = result[i]['description']
|
|
|
|
if description is None:
|
|
pass
|
|
# 检查描述中是否包含黑名单词汇
|
|
elif any(word in description for word in black_words):
|
|
continue # 跳过本次执行
|
|
# 截断描述并在300字节处添加...
|
|
elif len(description.encode('utf-8')) > MAX_DESCRIPTION_LENGTH:
|
|
# 找到300字节处的索引
|
|
byte_index = 0
|
|
char_index = 0
|
|
while byte_index < MAX_DESCRIPTION_LENGTH and char_index < len(description):
|
|
byte_index += len(description[char_index].encode('utf-8'))
|
|
char_index += 1
|
|
description = description[:char_index - 1] + '...'
|
|
|
|
link = result[i]['html_url']
|
|
name = result[i]['name']
|
|
created_at = result[i]['created_at']
|
|
author = result[i]['owner']['login']
|
|
language = result[i]['language']
|
|
|
|
# 将每个项目的详细信息存储在一个字典中
|
|
project_info = {
|
|
'link': link,
|
|
'name': name,
|
|
'created_at': created_at,
|
|
'description': description,
|
|
'author': author,
|
|
'language': language,
|
|
'keyword': keyword
|
|
}
|
|
# print(project_info)
|
|
all_results.append(project_info)
|
|
time.sleep(5)
|
|
|
|
# 将所有结果写入JSON文件
|
|
save_to_json(all_results, './resources/JSON/github_user.json')
|
|
|
|
def github_main():
|
|
if keyword_list:
|
|
github_main_keyword(keyword_list)
|
|
if tool_list:
|
|
github_main_repo(tool_list)
|
|
github_main_release(tool_list)
|
|
if user_list:
|
|
github_main_user(user_list)
|
|
|
|
if __name__ == "__main__":
|
|
github_main() |