import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr from events.models import Event as DSEvent, Organization from digitools import getBrowser, createURL, createBasicEvent, getSource current_year = str(datetime.now().year) venue, created = Organization.objects.get_or_create( name="Birchbark Books", city="Minneapolis", website="https://birchbarkbooks.com/pages/events", ) DATETIME_FORMAT = '%A, %B %d @ %I:%M%p %Y' DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y' DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y' DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y' def get_events(ps): contents = ps.xpath('.//*/div[@class="feature-row"]') # ppr("contents:", contents) for c in contents: try: event = {} event['title'] = c.xpath('.//*/p[@class="h3"]/text()')[0].strip() event['link'] = "https://birchbarkbooks.com/pages/events" event['date'] = c.xpath('.//*/p[@class="accent-subtitle"]/text()')[0].replace("Central", "") + " " + current_year event['date_num'] = event['date'].split(" ")[2].replace("th", "").replace("st", "").replace("rd", "").replace("nd", "") event['date'] = event['date'].split(" ") event['date'][2] = event['date_num'] event['date'] = " ".join(event['date']) event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT) createBasicEvent(event, "Ed", venue) except Exception as e: try: print(e) event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_2) createBasicEvent(event, "Ed", venue) print("\n\n+++\n\n") except Exception as e: try: print(e) event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_3) createBasicEvent(event, "Ed", venue) print("\n\n+++\n\n") except Exception as e: try: print(e) event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_4) createBasicEvent(event, "Ed", venue) print("\n\n+++\n\n") except Exception as e: print(e) event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT_5) createBasicEvent(event, "Ed", venue) print("\n\n+++\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() calendar_url = 'https://birchbarkbooks.com/pages/events' ps = getSource(br, calendar_url) get_events(ps) # ppr(events) br.close()