在scrapy中发送post请求,无法返回正确的结果


我的代码如下:


 class ScrSpider(BaseSpider):
    name = "scr"
    allowed_domains = ["http://pubs.rsc.org"]

    def start_requests(self):
        searchterm="AAEAAAD%2F%2F%2F%2F%2FAQAAAAAAAAAMAgAAAF9SU0NwdWJzLmVQbGF0Zm9ybS5TZXJ2aWNlLkRhdGFDb250cmFjdHMsIFZlcnNpb249MTkuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbAUBAAAAOVJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5TZWFyY2guU2VhcmNoVGVybQ4AAAAZPENhdGVnb3J5PmtfX0JhY2tpbmdGaWVsZBw8U3ViQ2F0ZWdvcnk%2Ba19fQmFja2luZ0ZpZWxkHDxDb250ZW50VHlwZT5rX19CYWNraW5nRmllbGQaPENyaXRlcmlhcz5rX19CYWNraW5nRmllbGQXPEZhY2V0cz5rX19CYWNraW5nRmllbGQcPFJlcXVlc3RUaW1lPmtfX0JhY2tpbmdGaWVsZB88QXV0aG9yQ3JpdGVyaWE%2Ba19fQmFja2luZ0ZpZWxkIDxQdWJsaWNhdGlvbkRhdGU%2Ba19fQmFja2luZ0ZpZWxkGTxFeGNsdWRlcz5rX19CYWNraW5nRmllbGQXPFNvdXJjZT5rX19CYWNraW5nRmllbGQfPE91dHB1dFN0YW5kYXJkPmtfX0JhY2tpbmdGaWVsZB48UmVzdWx0c0Zvcm1hdD5rX19CYWNraW5nRmllbGQePERpc3BsYXlDb3VudHM%2Ba19fQmFja2luZ0ZpZWxkIDxQcm9kdWN0UGFnZVNpemU%2Ba19fQmFja2luZ0ZpZWxkAQEBAwMABAQDAQEBAQG%2BAVN5c3RlbS5Db2xsZWN0aW9ucy5HZW5lcmljLkxpc3RgMVtbUlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLkVudGl0eS5OYW1lVmFsdWUsIFJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cywgVmVyc2lvbj0xOS4wLjAuMCwgQ3VsdHVyZT1uZXV0cmFsLCBQdWJsaWNLZXlUb2tlbj1udWxsXV2%2BAVN5c3RlbS5Db2xsZWN0aW9ucy5HZW5lcmljLkxpc3RgMVtbUlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLkVudGl0eS5OYW1lVmFsdWUsIFJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cywgVmVyc2lvbj0xOS4wLjAuMCwgQ3VsdHVyZT1uZXV0cmFsLCBQdWJsaWNLZXlUb2tlbj1udWxsXV0NPVJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5TZWFyY2guQXV0aG9yQ3JpdGVyaWECAAAAPlJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5TZWFyY2guUHVibGljYXRpb25EYXRlAgAAAL4BU3lzdGVtLkNvbGxlY3Rpb25zLkdlbmVyaWMuTGlzdGAxW1tSU0NwdWJzLmVQbGF0Zm9ybS5TZXJ2aWNlLkRhdGFDb250cmFjdHMuRW50aXR5Lk5hbWVWYWx1ZSwgUlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLCBWZXJzaW9uPTE5LjAuMC4wLCBDdWx0dXJlPW5ldXRyYWwsIFB1YmxpY0tleVRva2VuPW51bGxdXQIAAAAGAwAAAANhbGwGBAAAAAAGBQAAAANhbGwJBgAAAAkHAAAAAAAAAAAAAAAJCAAAAAkJAAAACQoAAAAKCgoKCgQGAAAAvgFTeXN0ZW0uQ29sbGVjdGlvbnMuR2VuZXJpYy5MaXN0YDFbW1JTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5FbnRpdHkuTmFtZVZhbHVlLCBSU0NwdWJzLmVQbGF0Zm9ybS5TZXJ2aWNlLkRhdGFDb250cmFjdHMsIFZlcnNpb249MTkuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbF1dAwAAAAZfaXRlbXMFX3NpemUIX3ZlcnNpb24EAAA6UlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLkVudGl0eS5OYW1lVmFsdWVbXQIAAAAICAkLAAAAAgAAAAIAAAABBwAAAAYAAAAJDAAAAAAAAAAAAAAABQgAAAA9UlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLlNlYXJjaC5BdXRob3JDcml0ZXJpYQIAAAAgPEJvb2xlYW5PcGVyYXRvcj5rX19CYWNraW5nRmllbGQYPEF1dGhvcnM%2Ba19fQmFja2luZ0ZpZWxkAQO%2FAVN5c3RlbS5Db2xsZWN0aW9ucy5HZW5lcmljLkxpc3RgMVtbUlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLlNlYXJjaC5BdXRob3JJbmZvLCBSU0NwdWJzLmVQbGF0Zm9ybS5TZXJ2aWNlLkRhdGFDb250cmFjdHMsIFZlcnNpb249MTkuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49bnVsbF1dAgAAAAoKBQkAAAA%2BUlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLlNlYXJjaC5QdWJsaWNhdGlvbkRhdGUFAAAAHzxJc1NlbGVjdGVkRGF0ZT5rX19CYWNraW5nRmllbGQZPERhdGVUeXBlPmtfX0JhY2tpbmdGaWVsZBs8V2l0aEluTGFzdD5rX19CYWNraW5nRmllbGQaPERhdGVSYW5nZT5rX19CYWNraW5nRmllbGQcPERpc3BsYXlEYXRlPmtfX0JhY2tpbmdGaWVsZAABBAQBATlSU0NwdWJzLmVQbGF0Zm9ybS5TZXJ2aWNlLkRhdGFDb250cmFjdHMuU2VhcmNoLldpdGhJbkxhc3QCAAAAOFJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5TZWFyY2guRGF0ZVJhbmdlAgAAAAIAAAAACgoKCgEKAAAABgAAAAkMAAAAAAAAAAAAAAAHCwAAAAABAAAABAAAAAQ4UlNDcHVicy5lUGxhdGZvcm0uU2VydmljZS5EYXRhQ29udHJhY3RzLkVudGl0eS5OYW1lVmFsdWUCAAAACQ0AAAAJDgAAAA0CBwwAAAAAAQAAAAAAAAAEOFJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5FbnRpdHkuTmFtZVZhbHVlAgAAAAUNAAAAOFJTQ3B1YnMuZVBsYXRmb3JtLlNlcnZpY2UuRGF0YUNvbnRyYWN0cy5FbnRpdHkuTmFtZVZhbHVlBAAAABU8TmFtZT5rX19CYWNraW5nRmllbGQcPERpc3BsYXlOYW1lPmtfX0JhY2tpbmdGaWVsZBY8VmFsdWU%2Ba19fQmFja2luZ0ZpZWxkIDxCb29sZWFuT3BlcmF0b3I%2Ba19fQmFja2luZ0ZpZWxkAQEBAQIAAAAGDwAAAAhmcmVldGV4dAoGEAAAABJBdXRob3I6bGluZ2xpbmcgZ2UKAQ4AAAANAAAABhEAAAAQT3JpZ2luYWxGcmVlVGV4dAoGEgAAABJBdXRob3I6bGluZ2xpbmcgZ2UKCw%3D%3D"

        return [FormRequest("http://pubs.rsc.org/en/search/journalresult",
                               formdata={'searchterm': searchterm, 'resultcount': '100','category':'all','pageno':'1'},
                               callback=self.parse)]




    def parse(self, response):
       sel = Selector(response)
       base_url=get_base_url(response)
       sites = sel.xpath(u'//title/text()')
       print sites

有返回数据,只不过是错误页面。

同样,我在 在线post 中测试,能返回正确结果,如下图:
图片描述

我一开始怀疑是不是post参数太长,无法传递。但在post在线工具中测试又没有问题,所以现在怀疑是不是在scrapy中写法有问题?

请教。

post scrapy

十字纹海豚 9 years, 9 months ago

Your Answer