diff --git a/blog/aduana/bwin_aduana/bwin_aduana/__init__.py b/blog/aduana/bwin_aduana/bwin_aduana/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/blog/aduana/bwin_aduana/bwin_aduana/items.py b/blog/aduana/bwin_aduana/bwin_aduana/items.py new file mode 100644 index 0000000..d9b6e50 --- /dev/null +++ b/blog/aduana/bwin_aduana/bwin_aduana/items.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +# Define here the models for your scraped items +# +# See documentation in: +# http://doc.scrapy.org/en/latest/topics/items.html + +from scrapy import Item, Field + +class PlayerItem(Item): + name = Field() + odds = Field() + +def serialize_players(players): + return map(lambda x: dict(x), players) + +class EventItem(Item): + league = Field() + time = Field() + date = Field() + players = Field(serializer=serialize_players) diff --git a/blog/aduana/bwin_aduana/bwin_aduana/pipelines.py b/blog/aduana/bwin_aduana/bwin_aduana/pipelines.py new file mode 100644 index 0000000..491e4f9 --- /dev/null +++ b/blog/aduana/bwin_aduana/bwin_aduana/pipelines.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html + + +class BwinAduanaPipeline(object): + def process_item(self, item, spider): + return item diff --git a/blog/aduana/bwin_aduana/bwin_aduana/settings.py b/blog/aduana/bwin_aduana/bwin_aduana/settings.py new file mode 100644 index 0000000..ccc3ebf --- /dev/null +++ b/blog/aduana/bwin_aduana/bwin_aduana/settings.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +# Scrapy settings for bwin_aduana project +# +# For simplicity, this file contains only the most important settings by +# default. All the other settings are documented here: +# +# http://doc.scrapy.org/en/latest/topics/settings.html +# + +BOT_NAME = 'bwin_aduana' + +SPIDER_MODULES = ['bwin_aduana.spiders'] +NEWSPIDER_MODULE = 'bwin_aduana.spiders' + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +#USER_AGENT = 'bwin_aduana (+http://www.yourdomain.com)' diff --git a/blog/aduana/bwin_aduana/bwin_aduana/spiders/__init__.py b/blog/aduana/bwin_aduana/bwin_aduana/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/blog/aduana/bwin_aduana/bwin_aduana/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/blog/aduana/bwin_aduana/bwin_aduana/spiders/bwin.py b/blog/aduana/bwin_aduana/bwin_aduana/spiders/bwin.py new file mode 100644 index 0000000..2d811aa --- /dev/null +++ b/blog/aduana/bwin_aduana/bwin_aduana/spiders/bwin.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +import scrapy +from scrapy.http import FormRequest + +from bwin_aduana.items import PlayerItem, EventItem + +class BwinSpider(scrapy.Spider): + name = "bwin" + allowed_domains = ["bwin.com", "sports.bwin.com"] + + def start_requests(self): + return [ FormRequest('https://sports.bwin.com/en/sports/indexmultileague', + formdata={ 'sportId': '5', 'page': '1' }, + callback=self.parse) ] + + def get_players(self, text): + players = [] + for td in text.css('table.options td'): + player = PlayerItem() + player['odds'] = td.css('.odds::text').extract_first() + player['name'] = td.css('.option-name::text').extract_first() + players.append(player) + return players + + def get_events(self, text): + events = [] + league = text.xpath('h2//a[@class="league-link"]/text()').extract() + for li in text.css('ul li'): + event = EventItem() + event['time'] = li.xpath('h6//span[1]/text()').extract_first() + event['date'] = li.xpath('h6//span[2]/text()').extract_first() + event['players'] = self.get_players(li) + events.append(event) + return events + + def parse(self, response): + leagues = response.xpath('//div[@id="bet-offer"]//div[@id="international-highlights"]//div//ul//li') + events = [] + for league in leagues: + events.extend(self.get_events(league)) + + return events diff --git a/blog/aduana/bwin_aduana/scrapy.cfg b/blog/aduana/bwin_aduana/scrapy.cfg new file mode 100644 index 0000000..06c89ec --- /dev/null +++ b/blog/aduana/bwin_aduana/scrapy.cfg @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# http://doc.scrapy.org/en/latest/topics/scrapyd.html + +[settings] +default = bwin_aduana.settings + +[deploy] +#url = http://localhost:6800/ +project = bwin_aduana