Created
March 14, 2018 07:28
用微信好友个性签名生成词云
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itchat | |
import re | |
import jieba # 分词 | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator | |
from scipy.misc import imread # 这是一个处理图像的函数 | |
# 登录 | |
itchat.login() | |
# 获取好友列表 | |
friends = itchat.get_friends(update=True)[0:] | |
# print(friends) | |
# 打印之后你会发现,有大量的span,class,emoji,emoji1f3c3等的字段,因为个性签名中使用了表情符号,这些字段都是要过滤掉的,写个正则和replace方法过滤掉 | |
# 去除所有这些符号 | |
tList = [] | |
for i in friends: | |
# 获取个性签名 | |
signature = i['Signature'].strip().replace('span', '').replace('class', '').replace('emoji', '') | |
#正则匹配过滤掉emoji表情, 例如emoji1f33f等 | |
rep = re.compile("1f\d.+") | |
signature = rep.sub('', signature) | |
tList.append(signature) | |
# 代码 | |
# 拼接字符串 | |
text = ''.join(tList) | |
wordlist_jieba = jieba.cut(text, cut_all=True) | |
wl_space_split = ' '.join(wordlist_jieba) | |
# print(wl_space_split) | |
# 用于生成配色方案的图 | |
back_color = imread('mao.jpg') | |
# 词云 | |
my_wordcloud = WordCloud(background_color='white', # 背景颜色 | |
max_words=2000, # 最大词数 | |
mask=back_color, # 以该参数值作图绘制词云,这个参数不为空时,width和height会被忽略 | |
max_font_size=100, # 显示字体的最大值 | |
stopwords=STOPWORDS.add('中国'), # 使用内置的屏蔽词,再添加'中国' | |
font_path='/Users/guohongjun/Library/Fonts/simfang.ttf', # 指定字体文件 解决显示口字型乱码问题, | |
random_state=42, # 为每个词返回一个PIL颜色 | |
# width=1000, # 图片的宽 | |
# height=860 #图片的长 | |
) | |
# 用wl_space_split生成词云 | |
my_wordcloud.generate(wl_space_split) | |
# 基于彩色图像 生成响应的色彩 | |
image_colors = ImageColorGenerator(back_color) | |
# 显示图片 | |
# plt.imshow(my_wordcloud) | |
# 关闭坐标轴 | |
# plt.axis('off') | |
# 绘制词云 | |
plt.figure() | |
plt.imshow(my_wordcloud.recolor(color_func=image_colors)) | |
plt.axis('off') | |
# 保存图片 | |
my_wordcloud.to_file('ciyun.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment