updated naming conventions
This commit is contained in:
179
Working/venues/mn.mpls.first-avenue.py
Normal file
179
Working/venues/mn.mpls.first-avenue.py
Normal file
@@ -0,0 +1,179 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
||||
import dtss
|
||||
dtss.getReady()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
def get_info(pse):
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendars'] = [scraper.calendar]
|
||||
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
if event["show_title"] == "":
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
|
||||
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
|
||||
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
|
||||
try:
|
||||
event = get_date(pse, event)
|
||||
except Exception as e:
|
||||
print("date issue: ", e)
|
||||
try:
|
||||
event = get_details(pse, event)
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
print("Using alt date format 2: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
|
||||
ppr(event)
|
||||
except Exception as e:
|
||||
print("Using alt date format 3: ", e)
|
||||
print(event['date'])
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
|
||||
print("The Event:")
|
||||
ppr(event)
|
||||
return event
|
||||
|
||||
def get_date(pse, event):
|
||||
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
event["date"] = [month, day, year]
|
||||
return event
|
||||
|
||||
def get_details(pse, event):
|
||||
try:
|
||||
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
|
||||
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
|
||||
di = zip(details, info)
|
||||
details = {}
|
||||
for d,i in di:
|
||||
details[d] = i
|
||||
event["details"] = details
|
||||
return event
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="First Avenue",
|
||||
city="Minneapolis",
|
||||
website="https://first-avenue.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I%p'
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
month = int(datetime.now().month)
|
||||
day = int(datetime.now().day)
|
||||
|
||||
if month == 12:
|
||||
next_month = "01"
|
||||
else:
|
||||
next_month = month + 1
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
|
||||
year = int(datetime.now().year)
|
||||
|
||||
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
|
||||
|
||||
next_month_string = str(next_month) + "01"
|
||||
|
||||
if next_month == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
if int(next_month) == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
|
||||
|
||||
|
||||
# print("\n\n", calendar_url, calendar_url_2, "\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Events based on date of month
|
||||
if datetime.now().day < 8:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
elif 7 < datetime.now().day < 15:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
elif 14 < datetime.now().day < 21:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
|
||||
else:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
|
||||
events = []
|
||||
|
||||
for show in shows:
|
||||
br.get(show)
|
||||
sleep(2)
|
||||
try:
|
||||
pse = html.fromstring(br.page_source)
|
||||
except Exception as e:
|
||||
print(show)
|
||||
pass
|
||||
try:
|
||||
event = get_info(pse)
|
||||
except Exception as e:
|
||||
print("get_info error: ", e)
|
||||
pass
|
||||
try:
|
||||
event["link"] = show
|
||||
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
|
||||
else:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
|
||||
except Exception as e:
|
||||
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
|
||||
try:
|
||||
event['dateStamp'] = event['date_time']
|
||||
event['scraper'] = scraper
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
||||
# quit()
|
||||
|
||||
ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
Reference in New Issue
Block a user