Files
scrapers/Working/venues/mn.mpls.first-avenue.py

180 lines
6.4 KiB
Python

import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
from lxml import html
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
def get_info(pse):
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
if event["show_title"] == "":
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
try:
event = get_date(pse, event)
except Exception as e:
print("date issue: ", e)
try:
event = get_details(pse, event)
except Exception as e:
print("details issue: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
except Exception as e:
print("Using alt date format 2: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
ppr(event)
except Exception as e:
print("Using alt date format 3: ", e)
print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
print("The Event:")
ppr(event)
return event
def get_date(pse, event):
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
event["date"] = [month, day, year]
return event
def get_details(pse, event):
try:
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
di = zip(details, info)
details = {}
for d,i in di:
details[d] = i
event["details"] = details
return event
except Exception as e:
print("details issue: ", e)
venue, created = Organization.objects.get_or_create(
name="First Avenue",
city="Minneapolis",
website="https://first-avenue.com",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I%p'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
month = int(datetime.now().month)
day = int(datetime.now().day)
if month == 12:
next_month = "01"
else:
next_month = month + 1
if next_month < 10:
next_month = "0" + str(next_month)
if month < 10:
month = "0" + str(month)
year = int(datetime.now().year)
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
next_month_string = str(next_month) + "01"
if next_month == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
if int(next_month) == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
# print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
# Get Events based on date of month
if datetime.now().day < 8:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
else:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
events = []
for show in shows:
br.get(show)
sleep(2)
try:
pse = html.fromstring(br.page_source)
except Exception as e:
print(show)
pass
try:
event = get_info(pse)
except Exception as e:
print("get_info error: ", e)
pass
try:
event["link"] = show
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
else:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
except Exception as e:
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
try:
event['dateStamp'] = event['date_time']
event['scraper'] = scraper
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
scraper.items+=1
except Exception as e:
print("event creation error: ", e, "\n\n", event, "\n\n", created)
# quit()
ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()