keyan/test_textrank_zh.py

19 lines
573 B
Python
Raw Permalink Normal View History

2024-06-17 14:04:28 +08:00
from jieba.analyse import textrank
with open(r"D:\小工具程序\pdf2md\output_directory\good_i.mmd", "r", encoding="utf8") as f:
lines = []
for i in f.readlines():
if i.strip():
lines.append(i.strip())
else:
lines.append(" ")
print("".join(lines))
sentences_list: list = lines
all_article = "".join(sentences_list) # 将所有的文本整合为一个大文本
keywords = textrank(all_article, topK=10, withWeight=True)
print('Text rank 结果展示:')
for word, weight in keywords:
print(word, ": ", str(weight))