Recently, a lot of friends are learning Python by themselves and learning to collect some data on the Internet. In the past, they used to collect data by locomotives, but it was not flexible, so they spent some time learning Python and asked me to recommend some similar small projects to practice. I wrote several small projects and gave them.

【 Python Learning Communication Skirt 】

One is tiktok to watermark
Another is to climb the video barrage on B station, stutter words, and then words cloud into a picture

Please ignore variable names

1. Douyin remove watermarks

import requests import re import json def download_page(url, pc=True): if pc == True: Ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' else: UA = 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, Like Gecko) Version/11.0 Mobile/15A372 Safari/604.1' headers = {' user-agent ': ua } res = requests.get(url, headers=headers) return res if __name__ == '__main__': # https://v.douyin.com/JyPHShN/ url = input (' input shake av address: Res1 = download_page(url) patten = re.compile('/video/(.*?)/') # https://www.iesdouyin.com/share/video/6881151874846723339/?region=CN&mid=6881152095287479053&u_code=imbie9bd&titleType=t Itle x tamp = 1602169029 & app = aweme&utm _campaign = client_share & utm_medium = ios&tt _from = copy&utm _source = # / vodeo/copy Item_ids = (patten.findall(res1.url))[0] # Res2 = download_page(f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={item_ids}') res2_text = json.loads(res2.text) info = res2_text['item_list'][0] old_addr = info['video']['play_addr']['url_list'][0] new_addr = Replace ('playwm', 'play') # res3 = download_page(new_addr, False) new_addr = res3.url douyin_info = { 'aweme_id': info['aweme_id'], 'title': info['desc'], 'cover': info['video']['cover']['url_list'][0], 'play_addr': new_addr } print(douyin_info)Copy the code

2. Crawl and capture the barrage

Def download_page(url): headers = {' user-agent ': 'the Mozilla / 5.0 (Windows NT 10.0; Win64; X64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} res = requests. Get (url, Def get_cid(dvid): "" get cid :param dvid: https://api.bilibili.com/x/player/pagelist?bvid=BV1KK4y1h76a&jsonp=jsonp :return: cid ''' url = f'https://api.bilibili.com/x/player/pagelist?bvid={dvid}&jsonp=jsonp' res = download_page(url) return Loads ('data')[0]['cid'] # def get_msg(cid): '' :param cid: https://api.bilibili.com/x/v1/dm/list.so?oid=241955049 :return: ''' url = f'https://api.bilibili.com/x/v1/dm/list.so?oid={cid}' res = download_page(url) res.xml = res.content.decode('utf-8') patten = re.compile('<d.*?>(.*?)</d>') dan_mu_list = patten.findall(res.xml) return Def save_to_file(dan_mu_list, filename): with open(filename, mode='w', encoding=' utF-8 ') as f: def save_to_file(dan_mu_list, filename) with open(filename, mode='w', encoding=' utF-8 ') as f: For I in dan_mu_list: f.rite (I) f.rite ('\n') # def main(dvid): cid = get_cid(dvid) dan_mu_list = get_msg(cid) save_to_file(dan_mu_list, f'{dvid}.txt') if __name__ == '__main__': # dvid = 'BV1aE411d7Rp' dvid = input(' enter B station video suffix: ')Copy the code

These code data words are then cloud-generated into pictures

Def rand_file(filename): with open(filename, mode='r', encoding=' utF-8 ') as f: 1) def jieba_cut(STR, imgname): cut_list = jieba.lcut(str) word = ' '.join(cut_list) w = wordcloud.WordCloud(font_path='msyh.ttc', background_color='white', width=600, height=400) w.generate(word) w.to_file(f'{imgname}.png') if __name__ == '__main__': # dvid = 'BV1aE411d7Rp' dvid = input(' input ') STR = rand_file(f'{dvid}.txt') jieba_cut(STR, dvid) printCopy the code
! Super easy to understand! Recommended for small projects of actual combat self-study python] (https://p3-tt.byteimg.com/origin/dfic-imagehandler/cefd81a9-9cfd-420b-b6df-36e4a0acd561?from=pc)

Ok, today’s share is over here, feel helpful friends welcome to praise attention.