97 lines
2.7 KiB
Python
97 lines
2.7 KiB
Python
|
|
import os, sys
|
||
|
|
from datetime import datetime
|
||
|
|
from dateutil import relativedelta
|
||
|
|
|
||
|
|
import django
|
||
|
|
sys.path.append('../../../')
|
||
|
|
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||
|
|
django.setup()
|
||
|
|
|
||
|
|
from time import sleep
|
||
|
|
from pprint import pprint as ppr
|
||
|
|
import pytz
|
||
|
|
|
||
|
|
from events.models import Organization, Scraper, Event
|
||
|
|
import events.digitools as digitools
|
||
|
|
|
||
|
|
|
||
|
|
from lxml import html
|
||
|
|
|
||
|
|
count = 0
|
||
|
|
|
||
|
|
venue, created = Organization.objects.get_or_create(
|
||
|
|
name="Hook & Ladder",
|
||
|
|
city="Minneapolis",
|
||
|
|
website="https://thehookmpls.com",
|
||
|
|
is_venue=True,
|
||
|
|
)
|
||
|
|
|
||
|
|
scraper,item_count_start = digitools.getScraper(venue)
|
||
|
|
ppr(scraper)
|
||
|
|
|
||
|
|
tz = pytz.timezone("US/Central")
|
||
|
|
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
|
||
|
|
|
||
|
|
# Set initial variables for City, etc
|
||
|
|
calendar_url = [
|
||
|
|
"https://thehookmpls.com/events/list/page/1",
|
||
|
|
"https://thehookmpls.com/events/list/page/2",
|
||
|
|
"https://thehookmpls.com/events/list/page/3"
|
||
|
|
]
|
||
|
|
|
||
|
|
if len(sys.argv) >= 2:
|
||
|
|
arg1 = sys.argv[1]
|
||
|
|
br = digitools.getBrowser(arg1)
|
||
|
|
else:
|
||
|
|
print("No run_env")
|
||
|
|
quit()
|
||
|
|
|
||
|
|
|
||
|
|
def get_listings(pse, events):
|
||
|
|
nevents = pse.xpath('.//*/article')
|
||
|
|
for event in nevents:
|
||
|
|
e = {}
|
||
|
|
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||
|
|
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
|
||
|
|
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
|
||
|
|
try:
|
||
|
|
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
|
||
|
|
except:
|
||
|
|
continue
|
||
|
|
try:
|
||
|
|
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
|
||
|
|
except:
|
||
|
|
e['price'] = "See Link"
|
||
|
|
e['image'] = event.xpath('.//*/img/@data-src')[0]
|
||
|
|
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
|
||
|
|
e['scraper'] = scraper
|
||
|
|
e['calendar'] = scraper.calendar
|
||
|
|
events.append(e)
|
||
|
|
|
||
|
|
events = []
|
||
|
|
|
||
|
|
for cal in calendar_url:
|
||
|
|
br.get(cal)
|
||
|
|
sleep(3)
|
||
|
|
pse = html.fromstring(br.page_source)
|
||
|
|
get_listings(pse, events)
|
||
|
|
|
||
|
|
for event in events:
|
||
|
|
try:
|
||
|
|
new_event = Event.objects.update_or_create(
|
||
|
|
calendar = event['calendar'],
|
||
|
|
scraper = event['scraper'],
|
||
|
|
event_type = 'Mu',
|
||
|
|
show_title = event["show_title"],
|
||
|
|
show_link = event["link"],
|
||
|
|
show_date = event["date_time"],
|
||
|
|
show_day = event["date_time"],
|
||
|
|
guests = " ".join(event["subtitle"]),
|
||
|
|
venue = venue
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||
|
|
|
||
|
|
br.close()
|
||
|
|
digitools.updateScraper(scraper, item_count_start)
|