PyBot/media/freebuf.py
2024-12-06 16:53:58 +08:00

65 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import os
import requests
import xml.etree.ElementTree as ET
import json
from requests.exceptions import RequestException
# 测试用爬虫请求头
headers = {
"Content-Type": "application/json",
"Cache-Control": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Sec-Fetch-Site": "same-origin",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-User": "?1",
"Sec-Fetch-Dest": "document",
"Accept-Language": "zh-CN,zh;q=0.9"
}
def fetch_rss(url, headers):
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # 检查请求是否成功
return response.content
except RequestException as e:
print(f"请求失败: {e}") # 可选:打印错误信息
return None # 返回None表示请求失败
def parse_rss(rss_content):
items = []
root = ET.fromstring(rss_content)
for item in root.findall('.//item'):
item_dict = {}
for child in item:
tag = child.tag
# 将一标签替换名称方便处理
if tag.startswith('{http://purl.org/rss/1.0/modules/content/}'):
tag = 'body'
item_dict[tag] = child.text
items.append(item_dict)
return items
def save_to_json(data, filename):
with open(filename, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def freebuf_main():
url = "https://www.freebuf.com/feed"
rss_content = fetch_rss(url, headers)
if rss_content is not None:
items = parse_rss(rss_content)
# 确保目录存在
os.makedirs(os.path.dirname('./JSON/freebuf.json'), exist_ok=True)
# 将解析后的数据保存到 JSON 文件
save_to_json(items, './JSON/freebuf.json')
print("数据已保存到 ./JSON/freebuf.json")
else:
print("无法获取Freebuf社区RSS内容跳过保存操作。")
if __name__ == '__main__':
freebuf_main()