import os, sys from datetime import datetime from dateutil import relativedelta sys.path.append('/var/www/digisnaxx.ado/scrapers') import dtss dtss.getReady() from time import sleep from pprint import pprint as ppr import pytz from selenium.webdriver.common.by import By from lxml import html from events.models import Organization, Scraper, Event import events.digitools as digitools def process_times(times): time = [] for t in times: t = t.replace("\n", "").replace("TBA", "") if len(t) > 0 and t.endswith("pm"): if "-" in t: t = t.split("-")[0] + "pm" time.append(t) return time venue, created = Organization.objects.get_or_create( name="Club 331", city="Minneapolis", website="https://331club.com", is_venue=True, ) scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') DATETIME_FORMAT = '%b %d %I%p %Y' DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y' DATETIME_FORMAT_3 = '%b %d %Y' # Set initial variables for City, etc calendar_url = 'https://331club.com/#calendar' current_year = str(datetime.now().year) if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() br.get(calendar_url) sleep(3) br.find_element(By.CLASS_NAME, 'more_events').click() sleep(2) ps = html.fromstring(br.page_source) sleep(3) dates = ps.xpath('.//*/div[@class="event"]') dates = dates + ps.xpath('.//*/div[@class="event hidden"]') events = [] for d in dates: event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2] cols = d.xpath('.//div[@class="column"]') for c in cols: bands = c.xpath('.//p/a/text()') links = c.xpath('.//p/a/@href') time = process_times(c.xpath('.//p/text()')) event = {} event["datetime"] = event_date + time + [current_year] try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT) except: try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2) except: try: event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3) except: event["date_time"] = "Invalid" event["bands"] = (", ").join(bands) if len(bands) > 0: event['scraper'] = scraper event['calendars'] = [scraper.calendar] event['title'] = event["bands"] event['date'] = event["date_time"] event['dateStamp'] = event["date_time"] event['link'] = "https://331club.com/#calendar" try: digitools.createBasicEvent(event, "Mu", venue) except Exception as e: print('oops', e) events.append(event) br.close() digitools.updateScraper(scraper, item_count_start)