import os, sys from datetime import datetime from dateutil import relativedelta sys.path.append('/var/www/digisnaxx.ado/scrapers') import dtss dtss.getReady() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper, Event as DSEvent import events.digitools as digitools try: venue, created = Organization.objects.get_or_create( name="Parkway Theater", city="Minneapolis", website="https://theparkwaytheater.com", is_venue = True ) except Exception as e: venue = Organization.objects.get(name="Parkway Theater") scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp') tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d, %Y %I:%M %p' def get_events(ps, event_type): contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]') img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src') ps.xpath('.//*/span[@class="event-time-12hr"]/text()') for c,i in zip(contents,img_etc): try: event = {} event['calendars'] = [scraper.calendar] event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0] event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0] event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip() event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['desc'] = c.xpath('.//*/p/text()')[0] event['img_link'] = i event['details'] = { 'description': event['desc'], 'img_link': event['img_link'], } try: new_event, created = DSEvent.objects.update_or_create( scraper = scraper, event_type = event_type, show_title = event['title'], show_link = event['link'], show_date = datetime.strptime(event['date'], DATETIME_FORMAT), show_day = datetime.strptime(event['date'], DATETIME_FORMAT), more_details = event["details"], venue = venue ) digitools.add_calendar(new_event, 'msp') scraper.items+=1 except Exception as e: try: event['date'] = c.xpath('.//div/div/time/text()')[0].split("–")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip() event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) new_event, created = DSEvent.objects.update_or_create( scraper = scraper, event_type = event_type, show_title = event['title'], show_link = event['link'], show_date = datetime.strptime(event['date'], DATETIME_FORMAT), show_day = datetime.strptime(event['date'], DATETIME_FORMAT), more_details = event["details"], venue = venue ) digitools.add_calendar(new_event, 'msp') scraper.items+=1 except Exception as e: print(e) print("\n\n+++\n\n") except Exception as e: continue if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() calendar_url = 'https://theparkwaytheater.com/live-events' ps = digitools.getSource(br, calendar_url) get_events(ps, "Mu") calendar_url = "https://theparkwaytheater.com/movies" ps = digitools.getSource(br, calendar_url) get_events(ps, "Th") # ppr(events) br.close() digitools.updateScraper(scraper, item_count_start)