Python-当当最受欢迎的前250排名.py

TAIDONG 2018-06-07 AM 3439℃ 0条

import requests
from bs4 import BeautifulSoup
import xlwt


def request_douban(url):
    try:
        headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
    except requests.RequestException:
        return None


book = xlwt.Workbook(encoding='utf-8', style_compression=0)
sheet = book.add_sheet('豆瓣电影Top250', cell_overwrite_ok=True)
sheet.write(0, 0, '名称')
sheet.write(0, 1, '图片')
sheet.write(0, 2, '排名')
sheet.write(0, 3, '评分')
sheet.write(0, 4, '作者')
sheet.write(0, 5, '简介')

n = 1


def save_to_excel(soup):
    list = soup.find(class_='grid_view').find_all('li')

    for item in list:
        item_name = item.find(class_='title').string
        item_img = item.find('a').find('img').get('src')
        item_index = item.find(class_='').string
        item_score = item.find(class_='rating_num').string
        item_author = item.find('p').text
        if (item.find(class_='inq') != None):
            item_intr = item.find(class_='inq').string

        # print('爬取电影：' + item_index + ' | ' + item_name +' | ' + item_img +' | ' + item_score +' | ' + item_author +' | ' + item_intr )
        print('爬取电影：' + item_index + ' | ' + item_name + ' | ' + item_score + ' | ' + item_intr)

        global n

        sheet.write(n, 0, item_name)
        sheet.write(n, 1, item_img)
        sheet.write(n, 2, item_index)
        sheet.write(n, 3, item_score)
        sheet.write(n, 4, item_author)
        sheet.write(n, 5, item_intr)

        n = n + 1


def main(page):
    url = 'https://movie.douban.com/top250?start=' + str(page * 25) + '&filter='
    html = request_douban(url)
    soup = BeautifulSoup(html, 'lxml')
    save_to_excel(soup)


if __name__ == '__main__':

    for i in range(0, 10):
        main(i)

book.save(u'豆瓣最受欢迎的250部电影.xlsx')

标签: Python

非特殊说明，本博所有文章均为博主原创。

如若转载，请注明出处：https://zhangtaidong.cn/archives/59/

上一篇 LINUX/CentOS 睡眠问题与解决方法

下一篇 ESP HomeKit 环境搭建与配置

评论已关闭

登陆退出主页云盘

:c_g: :c_g: :c_g: 2020年11月27日 22:46:29
A contented mind is the greatest blessing a man can enjoy in this world.-Joseph Addison :idea: 2020年09月11日 15:22:53
最不喜欢下雨天...默默继续扣键盘.2020年06月20日 17:52:59
:g_u_i: 每天坚持10000步！希望疫情尽快结束！ :k_z: 2020年06月17日 21:36:26
锲而舍之，朽木不折;锲而不舍，金石可镂.-荀子2020年06月08日 11:15:23
这才是夏天该有的样子！2020年06月07日 16:22:44
辛苦了几天...终于成功将网站从Jpress移植到了TypeCho.o(╥﹏╥)o2020年06月05日 22:12:09

MQTT Arduino ESP32 Modem Share Router 远程控制智能开关 VPN Wireguard CentOS linux esp-8266 蓝牙控制 APP控制 LuatOS Docker Python Typecho Jpress HomeBridge Windows VB Tinkerboard ESP HOMEKIT AWTRIX 智能家居 ESP8266-01 远程开关安卓客户端

仙女养❤猪养猪❤小仙女

Python-当当最受欢迎的前250排名.py

评论已关闭

管理界面

栏目分类

最新动态

标签云

友情链接申请

Python-当当最受欢迎的前250排名.py

评论已关闭

管理界面

 栏目分类

最新动态

标签云

友情链接申请

管理界面

栏目分类

最新动态

标签云

友情链接申请