import os, sys from datetime import datetime from dateutil import relativedelta sys.path.append('/var/www/digisnaxx.ado/scrapers') import dtss dtss.getReady() from time import sleep from pprint import pprint as ppr from lxml import html import pytz from events.models import Organization, Scraper, Event import events.digitools as digitools def get_info(pse): event = {} event['scraper'] = scraper event['calendars'] = [scraper.calendar] event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip() event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '') if event["show_title"] == "": event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '') event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()') event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src') try: event = get_date(pse, event) except Exception as e: print("date issue: ", e) try: event = get_details(pse, event) except Exception as e: print("details issue: ", e) try: event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT) except Exception as e: print("Using alt date format 2: ", e) try: event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2) ppr(event) except Exception as e: print("Using alt date format 3: ", e) print(event['date']) event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3) print("The Event:") ppr(event) return event def get_date(pse, event): month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '') day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '') year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '') event["date"] = [month, day, year] return event def get_details(pse, event): try: details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()') info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()') di = zip(details, info) details = {} for d,i in di: details[d] = i event["details"] = details return event except Exception as e: print("details issue: ", e) venue, created = Organization.objects.get_or_create( name="First Avenue", city="Minneapolis", website="https://first-avenue.com", is_venue = True ) scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp') tz = pytz.timezone("US/Central") DATETIME_FORMAT = '%b %d %Y %I%p' DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p' DATETIME_FORMAT_3 = '%b %d %Y' # Set initial variables for City, etc month = int(datetime.now().month) day = int(datetime.now().day) if month == 12: next_month = "01" else: next_month = month + 1 if next_month < 10: next_month = "0" + str(next_month) if month < 10: month = "0" + str(month) year = int(datetime.now().year) calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day) next_month_string = str(next_month) + "01" if next_month == 1: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string else: if int(next_month) == 1: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string else: calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string # print("\n\n", calendar_url, calendar_url_2, "\n\n") if len(sys.argv) >= 2: arg1 = sys.argv[1] br = digitools.getBrowser(arg1) else: print("No run_env") quit() # Get Events based on date of month if datetime.now().day < 8: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63] elif 7 < datetime.now().day < 15: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href') elif 14 < datetime.now().day < 21: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95] ps = digitools.getSource(br, calendar_url_2) shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31] else: ps = digitools.getSource(br, calendar_url) shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href') ps = digitools.getSource(br, calendar_url_2) shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63] events = [] for show in shows: br.get(show) sleep(2) try: pse = html.fromstring(br.page_source) except Exception as e: print(show) pass try: event = get_info(pse) except Exception as e: print("get_info error: ", e) pass try: event["link"] = show if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]: venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul") else: venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis") except Exception as e: print("Venue creation error: ", e, "\n", event, "\n", event["venue"]) try: event['dateStamp'] = event['date_time'] event['scraper'] = scraper new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper) scraper.items+=1 except Exception as e: print("event creation error: ", e, "\n\n", event, "\n\n", created) # quit() ppr(events) br.close() digitools.updateScraper(scraper, item_count_start) # br.find_element_by_class_name('fc-btn_allCalendars-button').click()