Files
api.digisnaxx/events/scrapers/Working/venues/club331Scrape.py

105 lines
2.9 KiB
Python
Raw Normal View History

2025-10-11 03:50:49 -05:00
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
2025-11-30 15:34:08 -05:00
sys.path.append('../../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'config.django.local'
2025-10-11 03:50:49 -05:00
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from lxml import html
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Club 331",
city="Minneapolis",
website="https://331club.com",
is_venue=True,
)
2025-11-30 15:34:08 -05:00
scraper,item_count_start,virtcal = digitools.getScraper(venue, 'msp')
2025-10-11 03:50:49 -05:00
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %I%p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
br.find_element(By.CLASS_NAME, 'more_events').click()
sleep(2)
ps = html.fromstring(br.page_source)
sleep(3)
dates = ps.xpath('.//*/div[@class="event"]')
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
def process_times(times):
# print("Times: ", times)
time = []
for t in times:
t = t.replace("\n", "").replace("TBA", "")
if len(t) > 0 and t.endswith("pm"):
if "-" in t:
t = t.split("-")[0] + "pm"
time.append(t)
return time
events = []
for d in dates:
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
cols = d.xpath('.//div[@class="column"]')
for c in cols:
bands = c.xpath('.//p/a/text()')
links = c.xpath('.//p/a/@href')
time = process_times(c.xpath('.//p/text()'))
event = {}
event["datetime"] = event_date + time + [current_year]
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
except:
event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands)
if len(bands) > 0:
event['scraper'] = scraper
2025-11-30 15:34:08 -05:00
event['calendars'] = [scraper.calendar]
2025-10-11 03:50:49 -05:00
event['title'] = event["bands"]
event['date'] = event["date_time"]
event['dateStamp'] = event["date_time"]
event['link'] = "https://331club.com/#calendar"
try:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print('oops', e)
events.append(event)
br.close()
digitools.updateScraper(scraper, item_count_start)