CatwalkGlimpse-AISelections/FindNew.py

84 lines
2.7 KiB
Python
Raw Permalink Normal View History

2024-01-31 13:32:37 +08:00
import os
import re
from datetime import datetime, timedelta
import markdown
trending_dir = "./data/trending/"
new_dir = "./data/new/"
# 获取今天和昨天的日期并格式化为字符串
today = datetime.now().strftime("%Y-%m-%d")
year = today.split("-")[0]
month = today.split("-")[1]
project_urls = {}
if not os.path.isdir(new_dir + year + '/' + month):
os.makedirs(new_dir + year + '/' + month)
old_files = []
# 遍历 '/data/trending' 目录及其所有子目录
for dirpath, dirnames, filenames in os.walk(trending_dir):
# 遍历当前目录下的所有文件
for filename in filenames:
# 排除今天
if today in filename:
continue
print(os.path.join(dirpath, filename))
old_files.append(os.path.join(dirpath, filename))
def get_projects(filenames):
2024-02-01 09:42:42 +08:00
2024-01-31 13:32:37 +08:00
result = {}
for filename in filenames:
# 使用 'with' 语句打开文件,这样可以确保文件在使用完毕后会被正确关闭
if not os.path.exists(filename):
continue
with open(filename, 'r',encoding='utf-8') as file:
text = file.read()
# 使用正则表达式匹配类别和项目
categories = re.split(r'#### ', text)[1:]
2024-02-05 10:34:23 +08:00
print(filename)
2024-01-31 13:32:37 +08:00
# 为每个类别添加项目
for category in categories:
lines = category.split('\n')
category_name = lines[0].strip()
2024-02-05 10:39:44 +08:00
projects = result.get(category_name,[])
2024-02-19 16:16:41 +08:00
if category_name == 'python':
print(projects)
2024-01-31 13:32:37 +08:00
for line in lines[1:]:
2024-02-19 16:16:41 +08:00
match = re.search(r'\* \[(.+)\]\((.+)\):(.*)', line)
2024-01-31 13:32:37 +08:00
if match:
project_name, url, description = match.groups()
projects.append(project_name)
project_urls[project_name] = (url, description)
result[category_name] = projects
return result
# 读取今天和昨天的文件
year = today.split("-")[0]
month = today.split("-")[1]
today_file = trending_dir + year + '/' + month + '/' + f'{today}.md'
new_file = new_dir + year + '/' + month + '/' + f'{today}.md'
projects_today = get_projects([today_file])
projects_old = get_projects(old_files)
2024-02-19 16:17:20 +08:00
2024-01-31 13:32:37 +08:00
# 找出今天新增的项目并将其写入到新的文件中
with open(new_file, 'w',encoding='utf-8') as file:
file.write("## " + today + "\n")
for category, projects in projects_today.items():
new_projects = set(projects) - set(projects_old.get(category, []))
if new_projects:
file.write(f'#### {category}\n')
for project in new_projects:
2024-02-05 10:34:23 +08:00
file.write(f'* [{project}]({project_urls[project][0]}):{project_urls[project][1]}\n')