import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' django.setup() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper import events.digitools as digitools venue, created = Organization.objects.get_or_create( name="St Paul Chamber Orchestra", city="St Paul", website="https://thespco.org", is_venue = False ) scraper,item_count_start, virtcal = digitools.getScraper(venue, 'msp') # Time Signatures tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%A, %B %d, %Y – %I:%M %p' def get_events(ps, event_type): contents = ps.xpath('.//*/div[@class="event-title"]/a/@href') for c in set(contents): try: link = 'https://content.thespco.org' + c ps = digitools.getSource(br, link) ntitle = ps.xpath('.//*/article/h1/text()') subtitle = ps.xpath('.//*/article/h1/em/text()') event = {} event['scraper'] = scraper event['calendars'] = [scraper.calendar] if len(subtitle) == 1: if len(ntitle) == 2: title = ntitle[0] + subtitle[0] + ntitle[1] elif ntitle[0].startswith(" "): title = subtitle[0] + ntitle[0] else: title = ntitle[0] + subtitle[0] else: title = ntitle[0] events = ps.xpath('.//*/div[@class="day"]') for e in events: new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip() location = e.xpath('.//*/span[@class="location"]/text()')[0].strip() if 'Minneapolis' in location: location = 'Minneapolis' elif 'St. Paul' in location: location = 'St. Paul' else: location = location venue, created = Organization.objects.get_or_create( name=new_venue, city=location, is_venue = True ) dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip() event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT) event['venue'] = venue event['location'] = location event['title'] = "SPCO: " + title event['link'] = link event_type = "Mu" digitools.createBasicEvent(event, event_type, venue) scraper.items+=1 except Exception as e: print("ERROR: ", e) print("\n\n+++\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() # Get Event Page Link(s) links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/") for link in links: ps = digitools.getSource(br, link) get_events(ps, "Mu") sleep(3) br.close() digitools.updateScraper(scraper, item_count_start)