import os, sys from datetime import datetime from dateutil import relativedelta from time import sleep import pytz from lxml import html from pprint import pprint as ppr import django sys.path.append('../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from xvfbwrapper import Xvfb from selenium import webdriver as wd from events.models import Event as DSEvent, Organization, Promo, Scraper, Calendar tz = pytz.timezone("US/Central") td = relativedelta.relativedelta(months=1) odt = datetime.now() + td def getScraper(venue): try: scraper, created = Scraper.objects.get_or_create( name=venue.name, website=venue.website, calendar = Calendar.objects.get(id=1), items = 0, new_items = 0, last_ran = datetime.now(), ) except Exception as e: print(e) scraper = Scraper.objects.get(name=venue.name) num_of_events = DSEvent.objects.filter(scraper=scraper) scraper.items = len(num_of_events) scraper.save() print("Scraper: ", scraper) pass return scraper, scraper.items def updateScraper(scraper, item_count_start): num_of_events = DSEvent.objects.filter(scraper=scraper) scraper.items = len(num_of_events) scraper.new_items = len(num_of_events) - item_count_start scraper.last_ran = datetime.now() scraper.save() return def getSource(browser, link): browser.get(link) sleep(5) ps = html.fromstring(browser.page_source) return ps def getBrowser(run_env): if run_env == 'dev': print("Chrome is a go!") # chromeOptions = wd.ChromeOptions() # chromeOptions.binary_location = "/Application/Google\ Chrome.app" # chromeDriver = "/opt/homebrew/bin/chromedriver" # br = wd.Chrome(chromeDriver, options=chromeOptions) br = wd.Chrome() return br elif run_env == "def": print("Firefox go vroom") br = wd.Firefox() return br elif run_env == "prod": start_cmd = "Xvfb :91 && export DISPLAY=:91 &" xvfb = Xvfb() os.system(start_cmd) xvfb.start() print("started Xvfb") br = wd.Firefox() return br else: print("Failed", sys.argv, arg1) quit() def createBasicURL(site_url): month = datetime.now().month next_month = odt.month year = datetime.now().year links = [ site_url + str(month) + "/" + str(year), site_url + str(next_month) + "/" + str(year) ] return links def createURLNoZero(site_url): month = datetime.now().month next_month = odt.month year = datetime.now().year links = [ site_url + str(year) + "/" + str(month), ] if next_month == "1": links.append(site_url + str(int(year)+1) + "/" + str(next_month)) else: links.append(site_url + str(year) + "/" + str(next_month)) return links def createURL(site_url): month = datetime.now().month if month < 10: month = "0" + str(month) else: month = str(month) next_month = odt.month if next_month < 10: next_month = "0" + str(next_month) else: next_month = str(next_month) year = datetime.now().year links = [ site_url + str(year) + "/" + month, ] if next_month == "01": links.append(site_url + str(int(year)+1) + "/" + next_month) else: links.append(site_url + str(year) + "/" + next_month) return links def createDashURL(site_url): month = datetime.now().month if month < 10: month = "0" + str(month) else: month = str(month) next_month = odt.month if next_month < 10: next_month = "0" + str(next_month) else: next_month = str(next_month) year = datetime.now().year links = [ site_url + month + "-" + str(year), site_url + next_month + "-" + str(year) ] print(links) return links def createBasicEvent(event, event_type, venue): new_event, created = DSEvent.objects.update_or_create( event_type = event_type, show_title = event['title'], show_link = event['link'], show_date = event['dateStamp'], show_day = event['dateStamp'], calendar = event['calendar'], scraper = event['scraper'], venue = venue ) return new_event, created def createBasiciCalEvent(event, event_type, venue): print("starting create") ppr(event) new_event, created = DSEvent.objects.update_or_create( event_type = event_type, show_title = event['title'][0], show_link = event['link'], show_date = datetime.strptime(str(event['dateStamp'][0]), '%Y-%m-%d %H:%M:%S'), show_day = datetime.strptime(str(event['dateStamp'][0]), '%Y-%m-%d %H:%M:%S'), calendar = event['calendar'], scraper = event['scraper'], venue = venue ) print("created") return new_event, created def createDetailedEvent(event, event_type, venue, scraper): new_event, created = DSEvent.objects.update_or_create( event_type = event_type, show_title = event["show_title"], show_link = event["link"], show_date = event["dateStamp"], show_day = event["dateStamp"], guests = " ".join(event["guests"]), more_details = event["details"], calendar = event['calendar'], scraper = event['scraper'], venue = venue ) return new_event, created def createBasicArticle(article, event_type, organization): new_article, created = Promo.objects.update_or_create( promo_type = 'Ja', title = article['title'], target_link = article['link'], published = True, organization = organization ) return new_article, created def createIcalEvent(event, scraper, venue, event_type): new_date = event['dateStart'] new_event = {} new_event['scraper'] = scraper new_event['calendar'] = scraper.calendar new_event['title'] = event['strSummary'], new_event['date'] = str(new_date)[:-6], new_event['dateStamp'] = str(new_date)[:-6], new_event['link'] = venue.website createBasiciCalEvent(new_event, event_type, venue) def createCleanIcalEvent(event, scraper, venue, event_type): new_date = event['dateStart'] new_event = {} new_event['scraper'] = scraper new_event['calendar'] = scraper.calendar new_event['title'] = event['strSummary'], new_event['date'] = str(new_date), new_event['dateStamp'] = str(new_date), new_event['link'] = venue.website createBasiciCalEvent(new_event, event_type, venue) def getiCalEvents(gcal, scraper, venue): for component in gcal.walk(): event = {} event['scraper'] = scraper event['calendar'] = scraper.calendar event['strSummary'] = f"{(component.get('SUMMARY'))}" event['strDesc'] = component.get('DESCRIPTION') event['strLocation'] = component.get('LOCATION') event['dateStart'] = component.get('DTSTART') event['dateStamp'] = component.get('DTSTAMP') if event['dateStamp'] is not None: event['dateStamp'] = event['dateStamp'].dt if event['dateStart'] is not None: try: event['dateStart'] = event['dateStart'].dt except Exception as e: event['dateStart'] = event['dateStart'].dt event['dateEnd'] = (component.get('DTEND')) if event['dateEnd'] is not None: event['dateEnd'] = event['dateEnd'].dt else: event['dateEnd'] = event['dateStart'] if event['strSummary'] != 'None': event['details'] = { "description" : event['strDesc'], "Location" : event['strLocation'], } now_now = datetime.today().date() try: print("1Event: ", event['dateStart']) if event['dateStart'] > now_now: createIcalEvent(event, scraper, venue) except Exception as e: try: event['dateStart'] = event['dateStart'].date() print("1Event: ", event['dateStart']) if event['dateStart'] > now_now: createIcalEvent(event, scraper, venue) except Exception as e: print("The Error: ", e) pass