Scrapy 获取不到拉勾网指定的xpath数据
用scrapy shell调试,也没出来数据
爬虫小白,在线求大佬指点!
BOT_NAME = 'lagou'
SPIDER_MODULES = ['lagou.spiders']
NEWSPIDER_MODULE = 'lagou.spiders'
#指定Log级别
LOG_LEVEL = 'ERROR'
#LOG_FILE = 'lagou.log'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = [
'MSIE (MSIE 6.0; X11; Linux; i686) Opera 7.23',
'Opera/9.20 (Macintosh; Intel Mac OS X; U; en)',
'Opera/9.0 (Macintosh; PPC Mac OS X; U; en)',
'iTunes/9.0.3 (Macintosh; U; Intel Mac OS X 10_6_2; en-ca)',
'Mozilla/4.76 [en_jp] (X11; U; SunOS 5.8 sun4u)',
'iTunes/4.2 (Macintosh; U; PPC Mac OS X 10.2)',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0) Gecko/20100101 Firefox/5.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20120813 Firefox/16.0',
'Mozilla/4.77 [en] (X11; I; IRIX;64 6.5 IP30)',
'Mozilla/4.8 [en] (X11; U; SunOS; 5.7 sun4u)'
]
PROXIES = [
{
'ip_port': '61.216.156.222:60808',
'user_pass': ''
},
{
'ip_port': '183.236.232.160:8080',
'user_pass': ''
},
{
'ip_port': '222.74.73.202:42055',
'user_pass': ''
},
{
'ip_port': '210.5.10.87:53281',
'user_pass': ''
},
{
'ip_port': '183.236.232.160:8080',
'user_pass': ''
},
{
'ip_port': '61.216.156.222:60808',
'user_pass': ''
},
]
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
COOKIES_ENABLED = False
用xpath插件数据没问题啊,哭了...
急急急,在线等大佬指点!