“The Age of Awakening” is a hot review

Multi – page crawl explanation

Hot evaluation crawl analysis

A hot rating corresponds to a class value ofcomment-itemDiv tags

So we just need to get all of the class valuescomment-itemThe div tag of the current page can get all the hot comments

However, the heat specific is stored in a class value ofcomment-itemThe class value under the div tag isshortUnder the span TAB

So we just need to get all of the class valuesshortThe span tag of the current page can get all the hot comments

spans = data.find_all(class_="short")
for i in spans:
     global_text += ",".join(jieba.cut(str(i.text).strip()))

The complete code

import matplotlib.pyplot as plt import wordcloud import jieba from imageio import imread import requests from bs4 import  BeautifulSoup global_text = "" def getDetail(data): global global_text data = BeautifulSoup(data,"html.parser") spans = data.find_all(class_="short") for i in spans: Global_text += ",".join(jieba.cut(STR (i.ext).strip()))) # def towordCloud (): Global global_text mask = imread("./9.png") # WCD = wordcloud.wordcloud (font_path="C:\Windows\Fonts\msyh.ttc", Background_color ='white', # set background color random_state=80, Mask =mask) WCD. Generate (global_text) WCD. To_file ("res.jpg") # save as image plt.imShow (WCD) plt.axis('off') if __name__ = = "__main__ ': headers = {the user-agent:" Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit / 537.36 (KHTML, Like Gecko) Chrome/91.0.4472.124 Safari/ 537.36edg /91.0.864.64"} url = '{}&limit=20&status=P&sort=new_score' for i in Range (6, 10) : new_url = url.format(i * 20) response = requests.get(url=url,headers=headers) response.encoding = 'utf-8' getDetail(response.text) toWordCloud()

