import os, sys from datetime import datetime from dateutil import relativedelta import django sys.path.append('../../../') os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings' django.setup() from time import sleep from pprint import pprint as ppr from lxml import html import pytz from events.models import Organization, Scraper, Event import events.digitools as digitools venue, created = Organization.objects.get_or_create( name="First Avenue", city="Minneapolis", website="https://first-avenue.com", is_venue = True ) scraper,item_count_start = digitools.getScraper(venue) tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %Y %I%p' DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p' DATETIME_FORMAT_3 = '%b %d %Y' # Set initial variables for City, etc month = int(datetime.now().month) day = int(datetime.now().day) if month == 12: next_month = "01" else: next_month = month + 1 if next_month < 10: next_month = "0" + str(next_month) if month < 10: month = "0" + str(month) year = int(datetime.now().year) calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day) next_month_string = str(next_month) + "01" if next_month == 1: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string else: if int(next_month) == 1: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string else: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string print("\n\n", calendar_url, calendar_url_2, "\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() if datetime.now().day < 8: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63] elif 7 < datetime.now().day < 15: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href') elif 14 < datetime.now().day < 21: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95] ps = digitools.getSource(br, calendar_url_2) shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31] else: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href') ps = digitools.getSource(br, calendar_url_2) shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63] events = [] def get_info(pse): event = {} event['scraper'] = scraper event['calendar'] = scraper.calendar event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip() event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '') if event["show_title"] == "": event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '') event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()') event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src') try: event = get_date(pse, event) except Exception as e: print("date issue: ", e) try: event = get_details(pse, event) except Exception as e: print("details issue: ", e) try: event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT) except Exception as e: print("Using alt date format 2: ", e) try: event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2) ppr(event) except Exception as e: print("Using alt date format 3: ", e) print(event['date']) event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3) return event def get_date(pse, event): month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '') day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '') year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '') event["date"] = [month, day, year] return event def get_details(pse, event): try: details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()') info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()') di = zip(details, info) details = {} for d,i in di: details[d] = i event["details"] = details return event except Exception as e: print("details issue: ", e) for show in shows: br.get(show) sleep(2) try: pse = html.fromstring(br.page_source) except Exception as e: print(show) pass try: event = get_info(pse) except Exception as e: print("get_info error: ", e) try: event["link"] = show if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]: venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul") else: venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis") except Exception as e: print("Venue creation error: ", e, "\n", event, "\n", event["venue"]) try: event['dateStamp'] = event['date_time'] event['scraper'] = scraper new_event, created = digitools.createDetailedEvent(event, "Mu", venue) scraper.items+=1 except Exception as e: print("event creation error: ", e, "\n\n", event, "\n\n", created) quit() ppr(events) br.close() digitools.updateScraper(scraper, item_count_start) # br.find_element_by_class_name('fc-btn_allCalendars-button').click()