scrapy抓取内容保存为中文问题
爬虫的编写:
# -*- coding: utf-8 -*-
#encoding=utf-8
import scrapy
from maizi.items import MaiziItem
reload(__import__('sys')).setdefaultencoding('utf-8')
class MaimaiSpider(scrapy.Spider):
name = "maimai"
allowed_domains = ["http://www.zaobao.com/news/china/"]
start_urls = (
'http://www.zaobao.com/news/china//',
)
def parse(self, response):
for li in response.xpath('//*[@id="l_title"]/ul/li'):
item = MaiziItem()
item['title'] = li.xpath('a[1]/p/text()').extract()
item['link']=li.xpath('a[1]/@href').extract()
item['desc'] = li.xpath('a[2]/p/text()').extract()
yield item#返回项目
item的定义:
import scrapy
class MaiziItem(scrapy.Item):
title = scrapy.Field()
link=scrapy.Field()
desc =scrapy.Field()
爬虫结果输出:
是中文乱码,请问怎么改。
三日月扶子
9 years, 1 month ago