Files
scrapers/Working/venues/FirstAveScrape.py

179 lines
6.3 KiB
Python
Raw Normal View History

2025-11-30 16:29:50 -05:00
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
django.setup()
from time import sleep
from pprint import pprint as ppr
from lxml import html
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="First Avenue",
city="Minneapolis",
website="https://first-avenue.com",
is_venue = True
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I%p'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
month = int(datetime.now().month)
day = int(datetime.now().day)
if month == 12:
next_month = "01"
else:
next_month = month + 1
if next_month < 10:
next_month = "0" + str(next_month)
if month < 10:
month = "0" + str(month)
year = int(datetime.now().year)
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
next_month_string = str(next_month) + "01"
if next_month == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
if int(next_month) == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
if datetime.now().day < 8:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
else:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
events = []
def get_info(pse):
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
if event["show_title"] == "":
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
try:
event = get_date(pse, event)
except Exception as e:
print("date issue: ", e)
try:
event = get_details(pse, event)
except Exception as e:
print("details issue: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
except Exception as e:
print("Using alt date format 2: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
ppr(event)
except Exception as e:
print("Using alt date format 3: ", e)
print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
return event
def get_date(pse, event):
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
event["date"] = [month, day, year]
return event
def get_details(pse, event):
try:
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
di = zip(details, info)
details = {}
for d,i in di:
details[d] = i
event["details"] = details
return event
except Exception as e:
print("details issue: ", e)
for show in shows:
br.get(show)
sleep(2)
try:
pse = html.fromstring(br.page_source)
except Exception as e:
print(show)
pass
try:
event = get_info(pse)
except Exception as e:
print("get_info error: ", e)
try:
event["link"] = show
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
else:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
except Exception as e:
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
try:
event['dateStamp'] = event['date_time']
event['scraper'] = scraper
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
scraper.items+=1
except Exception as e:
print("event creation error: ", e, "\n\n", event, "\n\n", created)
quit()
ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()