import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr from selenium import webdriver as wd from xvfbwrapper import Xvfb from lxml import html import pytz from events.models import Event as DSEvent, Organization from digitools import getBrowser, createBasicEvent, getSource venue, created = Organization.objects.get_or_create( name="Cedar Cultural Center", city="Minneapolis", website="https://www.thecedar.org/listing", ) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p' DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y' DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y' DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y' def get_events(ps): links = ps.xpath('.//*/div[@class="summary-title"]/a/@href') # ppr("contents:", contents) for l in links: br.get("https://www.thecedar.org" + l) sleep(3) pse = html.fromstring(br.page_source) event = {} time = pse.xpath('.//*/time[@class="event-time-12hr-start"]/text()')[0] date = pse.xpath('.//*/time[@class="event-date"]/text()')[0] event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0] dateStamp = date + " " + time event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT) event['link'] = "https://www.thecedar.org" + l createBasicEvent(event, "Mu", venue) if len(sys.argv) >= 2: arg1 = sys.argv[1] br = getBrowser(arg1) else: print("No run_env") quit() calendar_url = 'https://www.thecedar.org/listing' ps = getSource(br, calendar_url) get_events(ps) # ppr(events) br.close()