免费视频淫片aa毛片_日韩高清在线亚洲专区vr_日韩大片免费观看视频播放_亚洲欧美国产精品完整版

打開APP
userphoto
未登錄

開通VIP,暢享免費電子書等14項超值服

開通VIP
怎么用Python爬取抖音小視頻? 資深程序員都這樣爬取的(附源碼)

簡介

抖音,是一款可以拍短視頻的音樂創(chuàng)意短視頻社交軟件,該軟件于2016年9月上線,是一個專注年輕人的15秒音樂短視頻社區(qū)。用戶可以通過這款軟件選擇歌曲,拍攝15秒的音樂短視頻,形成自己的作品。此APP已在Android各大應(yīng)用商店和APP Store均有上線。

今天咱們就用Python爬取抖音視頻

準(zhǔn)備:

環(huán)境:Python3.6+Windows

IDE:你開行就好,喜歡用哪個就用哪個

模塊:

1 from splinter.driver.webdriver.chrome import Options, Chrome2 from splinter.browser import Browser3 from contextlib import closing4 import requests, json, time, re, os, sys, time5 from bs4 import BeautifulSoup

獲得視頻播放地址

  • 查詢的用戶ID

  • 視頻名字列表

  • 視頻鏈接列表

  • 用戶昵稱

 1     def get_video_urls(self, user_id): 2  3 +        video_names = [] 4 +        video_urls = [] 5 +        unique_id = '' 6 +        while unique_id != user_id: 7 +            search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 8 +            req = requests.get(url = search_url, verify = False) 9 +            html = json.loads(req.text)10 +            aweme_count = html['user_list'][0]['user_info']['aweme_count']11 +            uid = html['user_list'][0]['user_info']['uid']12 +            nickname = html['user_list'][0]['user_info']['nickname']13 +            unique_id = html['user_list'][0]['user_info']['unique_id']14 +        user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count)15 +        req = requests.get(url = user_url, verify = False)16 +        html = json.loads(req.text)17 +        i = 118 +        for each in html['aweme_list']:19 +            share_desc = each['share_info']['share_desc']20 +            if '抖音-原創(chuàng)音樂短視頻社區(qū)' == share_desc:21 +                video_names.append(str(i) + '.mp4')22 +                i += 123 +            else:24 +                video_names.append(share_desc + '.mp4')25 +            video_urls.append(each['share_info']['share_url'])26 +27 +        return video_names, video_urls, nickname

獲得帶水印的視頻播放地址

  • video_url:帶水印的視頻播放地址

  • download_url: 帶水印的視頻下載地址

1     def get_download_url(self, video_url):2 3 +        req = requests.get(url = video_url, verify = False)4 +        bf = BeautifulSoup(req.text, 'lxml')5 +        script = bf.find_all('script')[-1]6 +        video_url_js = re.findall('var data = \[(.+)\];', str(script))[0]7 +        video_html = json.loads(video_url_js)8 +        download_url = video_html['video']['play_addr']['url_list'][0]9 +        return download_url

視頻下載

  • video_url: 帶水印的視頻地址

  • video_name: 視頻名

  • watermark_flag: 是否下載不帶水印的視頻

 1     def video_downloader(self, video_url, video_name, watermark_flag=True): 2 +        """ 3 +        視頻下載 4 +        Parameters: 5 +            video_url: 帶水印的視頻地址 6 +            video_name: 視頻名 7 +            watermark_flag: 是否下載不帶水印的視頻 8 +        Returns: 9 +            無10 +        """11 +        size = 012 +        if watermark_flag == True:13 +            video_url = self.remove_watermark(video_url)14 +        else:15 +            video_url = self.get_download_url(video_url)16 +        with closing(requests.get(video_url, stream=True, verify = False)) as response:17 +            chunk_size = 102418 +            content_size = int(response.headers['content-length']) 19 +            if response.status_code == 200:20 +                sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024))21 +22 +                with open(video_name, "wb") as file:  23 +                    for data in response.iter_content(chunk_size = chunk_size):24 +                        file.write(data)25 +                        size += len(data)26 +                        file.flush()27 +28 +                        sys.stdout.write('  [下載進(jìn)度]:%.2f%%' % float(size / content_size * 100) + '\r')29 +                        sys.stdout.flush()

獲得無水印的視頻播放地址

 1     def remove_watermark(self, video_url): 2 +        """ 3 +        獲得無水印的視頻播放地址 4 +        Parameters: 5 +            video_url: 帶水印的視頻地址 6 +        Returns: 7 +            無水印的視頻下載地址 8 +        """ 9 +        self.driver.visit('http://douyin.iiilab.com/')10 +        self.driver.find_by_tag('input').fill(video_url)11 +        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()12 +        html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html13 +        bf = BeautifulSoup(html, 'lxml')14 +        return bf.find('a').get('href')

下載視頻

 1     def run(self): 2 +        """ 3 +        運行函數(shù) 4 +        Parameters: 5 +            None 6 +        Returns: 7 +            None 8 +        """ 9 +        self.hello()10 +        user_id = input('請輸入ID(例如40103580):')11 +        video_names, video_urls, nickname = self.get_video_urls(user_id)12 +        if nickname not in os.listdir():13 +            os.mkdir(nickname)14 +        print('視頻下載中:共有%d個作品!\n' % len(video_urls))15 +        for num in range(len(video_urls)):16 +            print('  解析第%d個視頻鏈接 [%s] 中,請稍后!\n' % (num+1, video_urls[num]))17 +            if '\\' in video_names[num]:18 +                video_name = video_names[num].replace('\\', '')19 +            elif '/' in video_names[num]:20 +                video_name = video_names[num].replace('/', '')21 +            else:22 +                video_name = video_names[num]23 +            self.video_downloader(video_urls[num], os.path.join(nickname, video_name))24 +            print('\n')25 +26 +        print('下載完成!')

全部代碼

  1 +# -*- coding:utf-8 -*-  2   3 +Python學(xué)習(xí)交流群:125240963  4 +Python學(xué)習(xí)交流群:125240963  5 +Python學(xué)習(xí)交流群:125240963  6   7 +from splinter.driver.webdriver.chrome import Options, Chrome  8 +from splinter.browser import Browser  9 +from contextlib import closing 10 +import requests, json, time, re, os, sys, time 11 +from bs4 import BeautifulSoup 12 + 13  class DouYin(object): 14     def __init__(self, width = 500, height = 300): 15 +        """ 16 +        抖音App視頻下載 17 +        """ 18 +        # 無頭瀏覽器 19 +        chrome_options = Options() 20 +        chrome_options.add_argument('user-agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"') 21 +        self.driver = Browser(driver_name='chrome', executable_path='D:/chromedriver', options=chrome_options, headless=True) 22 + 23     def get_video_urls(self, user_id): 24 +        """ 25 +        獲得視頻播放地址 26 +        Parameters: 27 +            user_id:查詢的用戶ID 28 +        Returns: 29 +            video_names: 視頻名字列表 30 +            video_urls: 視頻鏈接列表 31 +            nickname: 用戶昵稱 32 +        """ 33 +        video_names = [] 34 +        video_urls = [] 35 +        unique_id = '' 36 +        while unique_id != user_id: 37 +            search_url = 'https://api.amemv.com/aweme/v1/discover/search/?cursor=0&keyword=%s&count=10&type=1&retry_type=no_retry&iid=17900846586&device_id=34692364855&ac=wifi&channel=xiaomi&aid=1128&app_name=aweme&version_code=162&version_name=1.6.2&device_platform=android&ssmix=a&device_type=MI+5&device_brand=Xiaomi&os_api=24&os_version=7.0&uuid=861945034132187&openudid=dc451556fc0eeadb&manifest_version_code=162&resolution=1080*1920&dpi=480&update_version_code=1622' % user_id 38 +            req = requests.get(url = search_url, verify = False) 39 +            html = json.loads(req.text) 40 +            aweme_count = html['user_list'][0]['user_info']['aweme_count'] 41 +            uid = html['user_list'][0]['user_info']['uid'] 42 +            nickname = html['user_list'][0]['user_info']['nickname'] 43 +            unique_id = html['user_list'][0]['user_info']['unique_id'] 44 +        user_url = 'https://www.douyin.com/aweme/v1/aweme/post/?user_id=%s&max_cursor=0&count=%s' % (uid, aweme_count) 45 +        req = requests.get(url = user_url, verify = False) 46 +        html = json.loads(req.text) 47 +        i = 1 48 +        for each in html['aweme_list']: 49 +            share_desc = each['share_info']['share_desc'] 50 +            if '抖音-原創(chuàng)音樂短視頻社區(qū)' == share_desc: 51 +                video_names.append(str(i) + '.mp4') 52 +                i += 1 53 +            else: 54 +                video_names.append(share_desc + '.mp4') 55 +            video_urls.append(each['share_info']['share_url']) 56 + 57 +        return video_names, video_urls, nickname 58 + 59     def get_download_url(self, video_url): 60 +        """ 61 +        獲得帶水印的視頻播放地址 62 +        Parameters: 63 +            video_url:帶水印的視頻播放地址 64 +        Returns: 65 +            download_url: 帶水印的視頻下載地址 66 +        """ 67 +        req = requests.get(url = video_url, verify = False) 68 +        bf = BeautifulSoup(req.text, 'lxml') 69 +        script = bf.find_all('script')[-1] 70 +        video_url_js = re.findall('var data = \[(.+)\];', str(script))[0] 71 +        video_html = json.loads(video_url_js) 72 +        download_url = video_html['video']['play_addr']['url_list'][0] 73 +        return download_url 74 + 75     def video_downloader(self, video_url, video_name, watermark_flag=True): 76 +        """ 77 +        視頻下載 78 +        Parameters: 79 +            video_url: 帶水印的視頻地址 80 +            video_name: 視頻名 81 +            watermark_flag: 是否下載不帶水印的視頻 82 +        Returns: 83 +            無 84 +        """ 85 +        size = 0 86 +        if watermark_flag == True: 87 +            video_url = self.remove_watermark(video_url) 88 +        else: 89 +            video_url = self.get_download_url(video_url) 90 +        with closing(requests.get(video_url, stream=True, verify = False)) as response: 91 +            chunk_size = 1024 92 +            content_size = int(response.headers['content-length'])  93 +            if response.status_code == 200: 94 +                sys.stdout.write('  [文件大小]:%0.2f MB\n' % (content_size / chunk_size / 1024)) 95 + 96 +                with open(video_name, "wb") as file:   97 +                    for data in response.iter_content(chunk_size = chunk_size): 98 +                        file.write(data) 99 +                        size += len(data)100 +                        file.flush()101 +102 +                        sys.stdout.write('  [下載進(jìn)度]:%.2f%%' % float(size / content_size * 100) + '\r')103 +                        sys.stdout.flush()104 +105 +106     def remove_watermark(self, video_url):107 +        """108 +        獲得無水印的視頻播放地址109 +        Parameters:110 +            video_url: 帶水印的視頻地址111 +        Returns:112 +            無水印的視頻下載地址113 +        """114 +        self.driver.visit('http://douyin.iiilab.com/')115 +        self.driver.find_by_tag('input').fill(video_url)116 +        self.driver.find_by_xpath('//button[@class="btn btn-default"]').click()117 +        html = self.driver.find_by_xpath('//div[@class="thumbnail"]/div/p')[0].html118 +        bf = BeautifulSoup(html, 'lxml')119 +        return bf.find('a').get('href')120 +121     def run(self):122 +        """123 +        運行函數(shù)124 +        Parameters:125 +            None126 +        Returns:127 +            None128 +        """129 +        self.hello()130 +        user_id = input('請輸入ID(例如40103580):')131 +        video_names, video_urls, nickname = self.get_video_urls(user_id)132 +        if nickname not in os.listdir():133 +            os.mkdir(nickname)134 +        print('視頻下載中:共有%d個作品!\n' % len(video_urls))135 +        for num in range(len(video_urls)):136 +            print('  解析第%d個視頻鏈接 [%s] 中,請稍后!\n' % (num+1, video_urls[num]))137 +            if '\\' in video_names[num]:138 +                video_name = video_names[num].replace('\\', '')139 +            elif '/' in video_names[num]:140 +                video_name = video_names[num].replace('/', '')141 +            else:142 +                video_name = video_names[num]143 +            self.video_downloader(video_urls[num], os.path.join(nickname, video_name))144 +            print('\n')145 +146 +        print('下載完成!')147 +148     def hello(self):149 +        """150 +        打印歡迎界面151 +        Parameters:152 +            None153 +        Returns:154 +            None155 +        """156 +        print('*' * 100)157 +        print('\t\t\t\t抖音App視頻下載小助手')158 +        print('\t\t作者:Python學(xué)習(xí)交流群:125240963')159 +        print('*' * 100)160 +161 +162 +if __name__ == '__main__':163 +    douyin = DouYin()164 +    douyin.run()

 

本站僅提供存儲服務(wù),所有內(nèi)容均由用戶發(fā)布,如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點擊舉報
打開APP,閱讀全文并永久保存 查看更多類似文章
猜你喜歡
類似文章
要你命三千xposed模塊組,你需要的這里都有!
電腦怎么給視頻加水???看完這篇文章你就知道了
免費去水印永久免費的軟件分享?來看工具合集
Python不僅能爬網(wǎng)頁還能爬取APP呢!批量爬取抖音視頻!最新代碼
Python實現(xiàn)超簡單【抖音】無水印視頻批量下載
餡餅配方及其做法
更多類似文章 >>
生活服務(wù)
分享 收藏 導(dǎo)長圖 關(guān)注 下載文章
綁定賬號成功
后續(xù)可登錄賬號暢享VIP特權(quán)!
如果VIP功能使用有故障,
可點擊這里聯(lián)系客服!

聯(lián)系客服