import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local' django.setup() from time import sleep from pprint import pprint as ppr import pytz import events.digitools as digitools from events.models import Organization, Scraper, Calendar, Event venue, created = Organization.objects.get_or_create( name="Acme Comedy Club", city="Minneapolis", website="https://acmecomedycompany.com/the-club/calendar/", is_venue = True ) scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp') tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %Y %I:%M %p' DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y' def get_events(ps, event_type): contents = ps.xpath('.//*/li[@class="event"]') for c in contents: try: event = {} day = c.xpath('.//*/span[@class="day"]/text()')[0] month = c.xpath('.//*/span[@class="mth"]/text()')[0] year = datetime.now().year if month == "Jan": year = int(year) + 1 event['scraper'] = scraper event['calendars'] = [scraper.calendar] event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0] event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()] event['date'] = " ".join(event['date']) event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT) event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0] digitools.createBasicEvent(event, "Co", venue) scraper.items+=1 except Exception as e: print(e) ppr(event) print("\n\n+++\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") br.close() quit() links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/") for link in links: ps = digitools.getSource(br, link) get_events(ps, "Co") digitools.updateScraper(scraper, item_count_start) br.close()