import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr import pytz from events.models import Organization, Scraper import events.digitools as digitools tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%Y-%m-%d %I:%M %p' venue, created = Organization.objects.get_or_create( name="White Squirrel", city="St. Paul", website="https://whitesquirrelbar.com", is_venue = True ) scraper,item_count_start = digitools.getScraper(venue) # Set initial variables for City, etc calendar_url = [ 'https://whitesquirrelbar.com/calendar/list/page/1/', 'https://whitesquirrelbar.com/calendar/list/page/2/', 'https://whitesquirrelbar.com/calendar/list/page/3/' ] if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() def get_listings(pse, events): listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]') for l in listings: event = {} event['scraper'] = scraper event['calendar'] = scraper.calendar try: event["image"] = l.xpath('.//*/img/@src')[0] except: event["image"] = "none" event["date"] = l.xpath('.//time/@datetime')[0] event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1] event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "") event["link"] = l.xpath('.//*/h3/a/@href')[0] event['datetime'] = event['date'] + " " + event['time'] event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT) events.append(event) digitools.createBasicEvent(event, "Mu", venue) scraper.items+=1 events = [] for cal in calendar_url: ps = digitools.getSource(br, cal) get_listings(ps, events) br.close() digitools.updateScraper(scraper, item_count_start)