import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' django.setup() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper, Event import events.digitools as digitools from lxml import html count = 0 venue, created = Organization.objects.get_or_create( name="Hook & Ladder", city="Minneapolis", website="https://thehookmpls.com", is_venue=True, ) scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp') ppr(scraper) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p' # Set initial variables for City, etc calendar_url = [ "https://thehookmpls.com/events/list/page/1", "https://thehookmpls.com/events/list/page/2", "https://thehookmpls.com/events/list/page/3" ] if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() def get_listings(pse, events): nevents = pse.xpath('.//*/article') for event in nevents: e = {} e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0] e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0] e['link'] = event.xpath('.//*/header/h2/a/@href')[0] try: e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0] except: continue try: e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "") except: e['price'] = "See Link" e['image'] = event.xpath('.//*/img/@data-src')[0] e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT) e['scraper'] = scraper e['calendar'] = [scraper.calendar] events.append(e) events = [] for cal in calendar_url: br.get(cal) sleep(3) pse = html.fromstring(br.page_source) get_listings(pse, events) for event in events: try: new_event = Event.objects.update_or_create( scraper = event['scraper'], event_type = 'Mu', show_title = event["show_title"], show_link = event["link"], show_date = event["date_time"], show_day = event["date_time"], guests = " ".join(event["subtitle"]), venue = venue ) digitools.add_calendar(new_event, 'msp') except Exception as e: print("oops ", e, "\n\n", "Scraper:", scraper) br.close() digitools.updateScraper(scraper, item_count_start)