- Published on
在scrapy中间件中使用protobuf解析
- Authors
- Name
- Shelton Ma
定义中间件
class DemoScrapyDownloaderMiddleware(object): def process_request(self, request, spider): # 添加body _url = request._get_url() meta_data = request._meta if 'v2/entities:search' in _url: spider.logger.debug('添加protobuf格式的body, 并更新headers') bd = Body() bd.limit = 120 bd.filter.status = meta_data.get('status') bd.field15 = 1 spider.logger.debug(f'MessageToJson(bd): {MessageToJson(bd)}') payload = bd.SerializeToString() request._set_body(payload) # 添加headers, 响应格式为json, 因为protobuf的格式可能会解析异常 headers = { 'Accept': "application/json", 'Pragma': "no-cache", 'Cache-Control': "no-cache", 'cache-control': "no-cache", "x-app-type": "groundup", "content-type": "application/x-protobuf", } scrapy_headers = {} for k, v in headers.items(): scrapy_headers[k.encode()] = v.encode() request.headers.update(scrapy_headers) return None
settings.py
# settings.py DOWNLOADER_MIDDLEWARES = { 'demo_scrapy.middlewares.DemoScrapyDownloaderMiddleware': 739, 'demo_scrapy.middlewares.ProxyMiddleware': 740, }