抖音,是一款可以拍短視頻的音樂創(chuàng)意短視頻社交軟件,該軟件于2016年9月上線,是一個專注年輕人的15秒音樂短視頻社區(qū)。用戶可以通過這款軟件選擇歌曲,拍攝15秒的音樂短視頻,形成自己的作品。此APP已在Android各大應(yīng)用商店和APP Store均有上線。
今天咱們就用Python爬取抖音視頻
環(huán)境:Python3.6+Windows
IDE:你開行就好,喜歡用哪個就用哪個
模塊:
1 from splinter.driver.webdriver.chrome import Options, Chrome2 from splinter.browser import Browser3 from contextlib import closing4 import requests, json, time, re, os, sys, time5 from bs4 import BeautifulSoup
查詢的用戶ID
視頻名字列表
視頻鏈接列表
用戶昵稱
1 def get_video_urls(self, user_id): 2 3 + video_names = [] 4 + video_urls = [] 5 + unique_id = '' 6 + while unique_id != user_id: 7 + search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 8 + req = requests.get(url = search_url, verify = False) 9 + html = json.loads(req.text)10 + aweme_count = html['user_list'][0]['user_info']['aweme_count']11 + uid = html['user_list'][0]['user_info']['uid']12 + nickname = html['user_list'][0]['user_info']['nickname']13 + unique_id = html['user_list'][0]['user_info']['unique_id']14 + user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)15 + req = requests.get(url = user_url, verify = False)16 + html = json.loads(req.text)17 + i = 118 + for each in html['aweme_list']:19 + share_desc = each['share_info']['share_desc']20 + if '抖音-原創(chuàng)音樂短視頻社區(qū)' == share_desc:21 + video_names.append(str(i) + '.mp4')22 + i += 123 + else:24 + video_names.append(share_desc + '.mp4')25 + video_urls.append(each['share_info']['share_url'])26 +27 + return video_names, video_urls, nickname
video_url:帶水印的視頻播放地址
download_url: 帶水印的視頻下載地址
1 def get_download_url(self, video_url):2 3 + req = requests.get(url = video_url, verify = False)4 + bf = BeautifulSoup(req.text, 'lxml')5 + script = bf.find_all('script')[-1]6 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]7 + video_html = json.loads(video_url_js)8 + download_url = video_html['video']['play_addr']['url_list'][0]9 + return download_url
video_url: 帶水印的視頻地址
video_name: 視頻名
watermark_flag: 是否下載不帶水印的視頻
1 def video_downloader(self, video_url, video_name, watermark_flag=True): 2 + """ 3 + 視頻下載 4 + Parameters: 5 + video_url: 帶水印的視頻地址 6 + video_name: 視頻名 7 + watermark_flag: 是否下載不帶水印的視頻 8 + Returns: 9 + 無10 + """11 + size = 012 + if watermark_flag == True:13 + video_url = self.remove_watermark(video_url)14 + else:15 + video_url = self.get_download_url(video_url)16 + with closing(requests.get(video_url, stream=True, verify = False)) as response:17 + chunk_size = 102418 + content_size = int(response.headers['content-length']) 19 + if response.status_code == 200:20 + sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))21 +22 + with open(video_name, "wb") as file: 23 + for data in response.iter_content(chunk_size = chunk_size):24 + file.write(data)25 + size += len(data)26 + file.flush()27 +28 + sys.stdout.write(' [下載進(jìn)度]:%.2f%%' % float(size / content_size * 100) + '\r')29 + sys.stdout.flush()
1 def remove_watermark(self, video_url): 2 + """ 3 + 獲得無水印的視頻播放地址 4 + Parameters: 5 + video_url: 帶水印的視頻地址 6 + Returns: 7 + 無水印的視頻下載地址 8 + """ 9 + self.driver.visit('http://douyin.iiilab.com/')10 + self.driver.find_by_tag('input').fill(video_url)11 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()12 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html13 + bf = BeautifulSoup(html, 'lxml')14 + return bf.find('a').get('href')
1 def run(self): 2 + """ 3 + 運行函數(shù) 4 + Parameters: 5 + None 6 + Returns: 7 + None 8 + """ 9 + self.hello()10 + user_id = input('請輸入ID(例如40103580):')11 + video_names, video_urls, nickname = self.get_video_urls(user_id)12 + if nickname not in os.listdir():13 + os.mkdir(nickname)14 + print('視頻下載中:共有%d個作品!\n' % len(video_urls))15 + for num in range(len(video_urls)):16 + print(' 解析第%d個視頻鏈接 [%s] 中,請稍后!\n' % (num+1, video_urls[num]))17 + if '\\' in video_names[num]:18 + video_name = video_names[num].replace('\\', '')19 + elif '/' in video_names[num]:20 + video_name = video_names[num].replace('/', '')21 + else:22 + video_name = video_names[num]23 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name))24 + print('\n')25 +26 + print('下載完成!')
全部代碼
1 +# -*- coding:utf-8 -*- 2 3 +Python學(xué)習(xí)交流群:125240963 4 +Python學(xué)習(xí)交流群:125240963 5 +Python學(xué)習(xí)交流群:125240963 6 7 +from splinter.driver.webdriver.chrome import Options, Chrome 8 +from splinter.browser import Browser 9 +from contextlib import closing 10 +import requests, json, time, re, os, sys, time 11 +from bs4 import BeautifulSoup 12 + 13 class DouYin(object): 14 def __init__(self, width = 500, height = 300): 15 + """ 16 + 抖音App視頻下載 17 + """ 18 + # 無頭瀏覽器 19 + chrome_options = Options() 20 + chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"') 21 + self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True) 22 + 23 def get_video_urls(self, user_id): 24 + """ 25 + 獲得視頻播放地址 26 + Parameters: 27 + user_id:查詢的用戶ID 28 + Returns: 29 + video_names: 視頻名字列表 30 + video_urls: 視頻鏈接列表 31 + nickname: 用戶昵稱 32 + """ 33 + video_names = [] 34 + video_urls = [] 35 + unique_id = '' 36 + while unique_id != user_id: 37 + search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 38 + req = requests.get(url = search_url, verify = False) 39 + html = json.loads(req.text) 40 + aweme_count = html['user_list'][0]['user_info']['aweme_count'] 41 + uid = html['user_list'][0]['user_info']['uid'] 42 + nickname = html['user_list'][0]['user_info']['nickname'] 43 + unique_id = html['user_list'][0]['user_info']['unique_id'] 44 + user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) 45 + req = requests.get(url = user_url, verify = False) 46 + html = json.loads(req.text) 47 + i = 1 48 + for each in html['aweme_list']: 49 + share_desc = each['share_info']['share_desc'] 50 + if '抖音-原創(chuàng)音樂短視頻社區(qū)' == share_desc: 51 + video_names.append(str(i) + '.mp4') 52 + i += 1 53 + else: 54 + video_names.append(share_desc + '.mp4') 55 + video_urls.append(each['share_info']['share_url']) 56 + 57 + return video_names, video_urls, nickname 58 + 59 def get_download_url(self, video_url): 60 + """ 61 + 獲得帶水印的視頻播放地址 62 + Parameters: 63 + video_url:帶水印的視頻播放地址 64 + Returns: 65 + download_url: 帶水印的視頻下載地址 66 + """ 67 + req = requests.get(url = video_url, verify = False) 68 + bf = BeautifulSoup(req.text, 'lxml') 69 + script = bf.find_all('script')[-1] 70 + video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] 71 + video_html = json.loads(video_url_js) 72 + download_url = video_html['video']['play_addr']['url_list'][0] 73 + return download_url 74 + 75 def video_downloader(self, video_url, video_name, watermark_flag=True): 76 + """ 77 + 視頻下載 78 + Parameters: 79 + video_url: 帶水印的視頻地址 80 + video_name: 視頻名 81 + watermark_flag: 是否下載不帶水印的視頻 82 + Returns: 83 + 無 84 + """ 85 + size = 0 86 + if watermark_flag == True: 87 + video_url = self.remove_watermark(video_url) 88 + else: 89 + video_url = self.get_download_url(video_url) 90 + with closing(requests.get(video_url, stream=True, verify = False)) as response: 91 + chunk_size = 1024 92 + content_size = int(response.headers['content-length']) 93 + if response.status_code == 200: 94 + sys.stdout.write(' [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) 95 + 96 + with open(video_name, "wb") as file: 97 + for data in response.iter_content(chunk_size = chunk_size): 98 + file.write(data) 99 + size += len(data)100 + file.flush()101 +102 + sys.stdout.write(' [下載進(jìn)度]:%.2f%%' % float(size / content_size * 100) + '\r')103 + sys.stdout.flush()104 +105 +106 def remove_watermark(self, video_url):107 + """108 + 獲得無水印的視頻播放地址109 + Parameters:110 + video_url: 帶水印的視頻地址111 + Returns:112 + 無水印的視頻下載地址113 + """114 + self.driver.visit('http://douyin.iiilab.com/')115 + self.driver.find_by_tag('input').fill(video_url)116 + self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()117 + html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html118 + bf = BeautifulSoup(html, 'lxml')119 + return bf.find('a').get('href')120 +121 def run(self):122 + """123 + 運行函數(shù)124 + Parameters:125 + None126 + Returns:127 + None128 + """129 + self.hello()130 + user_id = input('請輸入ID(例如40103580):')131 + video_names, video_urls, nickname = self.get_video_urls(user_id)132 + if nickname not in os.listdir():133 + os.mkdir(nickname)134 + print('視頻下載中:共有%d個作品!\n' % len(video_urls))135 + for num in range(len(video_urls)):136 + print(' 解析第%d個視頻鏈接 [%s] 中,請稍后!\n' % (num+1, video_urls[num]))137 + if '\\' in video_names[num]:138 + video_name = video_names[num].replace('\\', '')139 + elif '/' in video_names[num]:140 + video_name = video_names[num].replace('/', '')141 + else:142 + video_name = video_names[num]143 + self.video_downloader(video_urls[num], os.path.join(nickname, video_name))144 + print('\n')145 +146 + print('下載完成!')147 +148 def hello(self):149 + """150 + 打印歡迎界面151 + Parameters:152 + None153 + Returns:154 + None155 + """156 + print('*' * 100)157 + print('\t\t\t\t抖音App視頻下載小助手')158 + print('\t\t作者:Python學(xué)習(xí)交流群:125240963')159 + print('*' * 100)160 +161 +162 +if __name__ == '__main__':163 + douyin = DouYin()164 + douyin.run()