2025-11-30 16:29:50 -05:00
|
|
|
import os, sys
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from dateutil import relativedelta
|
|
|
|
|
|
2026-01-09 22:49:19 -05:00
|
|
|
sys.path.append('/var/www/digisnaxx.ado/scrapers')
|
|
|
|
|
import dtss
|
|
|
|
|
dtss.getReady()
|
2025-11-30 16:29:50 -05:00
|
|
|
|
|
|
|
|
from time import sleep
|
|
|
|
|
from pprint import pprint as ppr
|
|
|
|
|
|
|
|
|
|
from lxml import html
|
|
|
|
|
import pytz
|
|
|
|
|
|
|
|
|
|
from events.models import Organization, Scraper, Event
|
|
|
|
|
import events.digitools as digitools
|
|
|
|
|
|
2026-01-09 22:49:19 -05:00
|
|
|
def get_info(pse):
|
|
|
|
|
event = {}
|
|
|
|
|
event['scraper'] = scraper
|
|
|
|
|
event['calendars'] = [scraper.calendar]
|
|
|
|
|
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
|
|
|
|
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
|
|
|
|
if event["show_title"] == "":
|
|
|
|
|
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
|
|
|
|
|
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
|
|
|
|
|
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
|
|
|
|
|
try:
|
|
|
|
|
event = get_date(pse, event)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("date issue: ", e)
|
|
|
|
|
try:
|
|
|
|
|
event = get_details(pse, event)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("details issue: ", e)
|
|
|
|
|
try:
|
|
|
|
|
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("Using alt date format 2: ", e)
|
|
|
|
|
try:
|
|
|
|
|
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
|
|
|
|
|
ppr(event)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("Using alt date format 3: ", e)
|
|
|
|
|
print(event['date'])
|
|
|
|
|
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
|
|
|
|
|
print("The Event:")
|
|
|
|
|
ppr(event)
|
|
|
|
|
return event
|
|
|
|
|
|
|
|
|
|
def get_date(pse, event):
|
|
|
|
|
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
|
|
|
|
|
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
|
|
|
|
|
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
|
|
|
|
|
event["date"] = [month, day, year]
|
|
|
|
|
return event
|
|
|
|
|
|
|
|
|
|
def get_details(pse, event):
|
|
|
|
|
try:
|
|
|
|
|
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
|
|
|
|
|
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
|
|
|
|
|
di = zip(details, info)
|
|
|
|
|
details = {}
|
|
|
|
|
for d,i in di:
|
|
|
|
|
details[d] = i
|
|
|
|
|
event["details"] = details
|
|
|
|
|
return event
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("details issue: ", e)
|
|
|
|
|
|
2025-11-30 16:29:50 -05:00
|
|
|
venue, created = Organization.objects.get_or_create(
|
|
|
|
|
name="First Avenue",
|
|
|
|
|
city="Minneapolis",
|
|
|
|
|
website="https://first-avenue.com",
|
|
|
|
|
is_venue = True
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-09 22:49:19 -05:00
|
|
|
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
|
2025-11-30 16:29:50 -05:00
|
|
|
|
|
|
|
|
tz = pytz.timezone("US/Central")
|
|
|
|
|
|
|
|
|
|
DATETIME_FORMAT = '%b %d %Y %I%p'
|
|
|
|
|
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
|
|
|
|
|
DATETIME_FORMAT_3 = '%b %d %Y'
|
|
|
|
|
|
|
|
|
|
# Set initial variables for City, etc
|
|
|
|
|
month = int(datetime.now().month)
|
|
|
|
|
day = int(datetime.now().day)
|
|
|
|
|
|
|
|
|
|
if month == 12:
|
|
|
|
|
next_month = "01"
|
|
|
|
|
else:
|
|
|
|
|
next_month = month + 1
|
|
|
|
|
if next_month < 10:
|
|
|
|
|
next_month = "0" + str(next_month)
|
|
|
|
|
|
|
|
|
|
if month < 10:
|
|
|
|
|
month = "0" + str(month)
|
|
|
|
|
|
|
|
|
|
year = int(datetime.now().year)
|
|
|
|
|
|
|
|
|
|
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
|
|
|
|
|
|
|
|
|
|
next_month_string = str(next_month) + "01"
|
|
|
|
|
|
|
|
|
|
if next_month == 1:
|
|
|
|
|
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
|
|
|
|
else:
|
|
|
|
|
if int(next_month) == 1:
|
|
|
|
|
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
|
|
|
|
else:
|
|
|
|
|
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
|
|
|
|
|
|
|
|
|
|
|
2026-01-09 22:49:19 -05:00
|
|
|
# print("\n\n", calendar_url, calendar_url_2, "\n\n")
|
2025-11-30 16:29:50 -05:00
|
|
|
|
|
|
|
|
if len(sys.argv) >= 2:
|
|
|
|
|
arg1 = sys.argv[1]
|
|
|
|
|
br = digitools.getBrowser(arg1)
|
|
|
|
|
else:
|
|
|
|
|
print("No run_env")
|
|
|
|
|
quit()
|
|
|
|
|
|
2026-01-09 22:49:19 -05:00
|
|
|
# Get Events based on date of month
|
2025-11-30 16:29:50 -05:00
|
|
|
if datetime.now().day < 8:
|
|
|
|
|
ps = digitools.getSource(br, calendar_url)
|
|
|
|
|
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
|
|
|
|
elif 7 < datetime.now().day < 15:
|
|
|
|
|
ps = digitools.getSource(br, calendar_url)
|
|
|
|
|
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
|
|
|
|
elif 14 < datetime.now().day < 21:
|
|
|
|
|
ps = digitools.getSource(br, calendar_url)
|
|
|
|
|
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
|
|
|
|
|
ps = digitools.getSource(br, calendar_url_2)
|
|
|
|
|
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
|
|
|
|
|
else:
|
|
|
|
|
ps = digitools.getSource(br, calendar_url)
|
|
|
|
|
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
|
|
|
|
ps = digitools.getSource(br, calendar_url_2)
|
|
|
|
|
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
|
|
|
|
|
|
|
|
|
events = []
|
|
|
|
|
|
|
|
|
|
for show in shows:
|
|
|
|
|
br.get(show)
|
|
|
|
|
sleep(2)
|
|
|
|
|
try:
|
|
|
|
|
pse = html.fromstring(br.page_source)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(show)
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
event = get_info(pse)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("get_info error: ", e)
|
2026-01-09 22:49:19 -05:00
|
|
|
pass
|
2025-11-30 16:29:50 -05:00
|
|
|
try:
|
|
|
|
|
event["link"] = show
|
|
|
|
|
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
|
|
|
|
|
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
|
|
|
|
|
else:
|
|
|
|
|
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
|
|
|
|
|
try:
|
|
|
|
|
event['dateStamp'] = event['date_time']
|
|
|
|
|
event['scraper'] = scraper
|
|
|
|
|
new_event, created = digitools.createDetailedEvent(event, "Mu", venue, scraper)
|
|
|
|
|
scraper.items+=1
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
2026-01-09 22:49:19 -05:00
|
|
|
# quit()
|
2025-11-30 16:29:50 -05:00
|
|
|
|
|
|
|
|
ppr(events)
|
|
|
|
|
br.close()
|
|
|
|
|
|
|
|
|
|
digitools.updateScraper(scraper, item_count_start)
|
|
|
|
|
|
|
|
|
|
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|