import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr import pytz from selenium.webdriver.common.by import By from lxml import html from events.models import Organization, Scraper, Event import events.digitools as digitools venue, created = Organization.objects.get_or_create( name="Club 331", city="Minneapolis", website="https://331club.com", is_venue=True, ) scraper,item_count_start = digitools.getScraper(venue) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %I%p %Y' DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y' DATETIME_FORMAT_3 = '%b %d %Y' # Set initial variables for City, etc calendar_url = 'https://331club.com/#calendar' current_year = str(datetime.now().year) if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(3) br.find_element(By.CLASS_NAME, 'more_events').click() sleep(2) ps = html.fromstring(br.page_source) sleep(3) dates = ps.xpath('.//*/div[@class="event"]') dates = dates + ps.xpath('.//*/div[@class="event hidden"]') def process_times(times): # print("Times: ", times) time = [] for t in times: t = t.replace("\n", "").replace("TBA", "") if len(t) > 0 and t.endswith("pm"): if "-" in t: t = t.split("-")[0] + "pm" time.append(t) return time events = [] for d in dates: event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2] cols = d.xpath('.//div[@class="column"]') for c in cols: bands = c.xpath('.//p/a/text()') links = c.xpath('.//p/a/@href') time = process_times(c.xpath('.//p/text()')) event = {} event["datetime"] = event_date + time + [current_year] try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT) except: try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2) except: try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3) except: event["date_time"] = "Invalid" event["bands"] = (", ").join(bands) if len(bands) > 0: event['scraper'] = scraper event['calendar'] = scraper.calendar event['title'] = event["bands"] event['date'] = event["date_time"] event['dateStamp'] = event["date_time"] event['link'] = "https://331club.com/#calendar" try: digitools.createBasicEvent(event, "Mu", venue) except Exception as e: print('oops', e) events.append(event) br.close() digitools.updateScraper(scraper, item_count_start)