CatwalkGlimpse-AISelections/FindNew.py
2024-02-19 16:17:20 +08:00

84 lines
2.7 KiB
Python

import os
import re
from datetime import datetime, timedelta
import markdown
trending_dir = "./data/trending/"
new_dir = "./data/new/"
# 获取今天和昨天的日期并格式化为字符串
today = datetime.now().strftime("%Y-%m-%d")
year = today.split("-")[0]
month = today.split("-")[1]
project_urls = {}
if not os.path.isdir(new_dir + year + '/' + month):
os.makedirs(new_dir + year + '/' + month)
old_files = []
# 遍历 '/data/trending' 目录及其所有子目录
for dirpath, dirnames, filenames in os.walk(trending_dir):
# 遍历当前目录下的所有文件
for filename in filenames:
# 排除今天
if today in filename:
continue
print(os.path.join(dirpath, filename))
old_files.append(os.path.join(dirpath, filename))
def get_projects(filenames):
result = {}
for filename in filenames:
# 使用 'with' 语句打开文件,这样可以确保文件在使用完毕后会被正确关闭
if not os.path.exists(filename):
continue
with open(filename, 'r',encoding='utf-8') as file:
text = file.read()
# 使用正则表达式匹配类别和项目
categories = re.split(r'#### ', text)[1:]
print(filename)
# 为每个类别添加项目
for category in categories:
lines = category.split('\n')
category_name = lines[0].strip()
projects = result.get(category_name,[])
if category_name == 'python':
print(projects)
for line in lines[1:]:
match = re.search(r'\* \[(.+)\]\((.+)\):(.*)', line)
if match:
project_name, url, description = match.groups()
projects.append(project_name)
project_urls[project_name] = (url, description)
result[category_name] = projects
return result
# 读取今天和昨天的文件
year = today.split("-")[0]
month = today.split("-")[1]
today_file = trending_dir + year + '/' + month + '/' + f'{today}.md'
new_file = new_dir + year + '/' + month + '/' + f'{today}.md'
projects_today = get_projects([today_file])
projects_old = get_projects(old_files)
# 找出今天新增的项目并将其写入到新的文件中
with open(new_file, 'w',encoding='utf-8') as file:
file.write("## " + today + "\n")
for category, projects in projects_today.items():
new_projects = set(projects) - set(projects_old.get(category, []))
if new_projects:
file.write(f'#### {category}\n')
for project in new_projects:
file.write(f'* [{project}]({project_urls[project][0]}):{project_urls[project][1]}\n')