Skip to content

案例

豆瓣TOP250

from loguru import logger
from traspider import Spider, Request


class DoubanSpider(Spider):

    def __init__(self):
        self.urls = [f"https://movie.douban.com/top250?start={i}&filter=" for i in range(0, 250, 25)]

    def parse(self, response, request):
        urls = response.xpath("//div[@class='hd']/a/@href")
        for url in urls:
            yield Request(url=url, callback=self.parse_detail)

    def parse_detail(self, response, request):
        logger.info(response)

    async def download_middleware(self, request):
        request.headers = {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
        }
        return request


if __name__ == "__main__":
    douban_spider = DoubanSpider()
    douban_spider.start()