Files
scrapers/Working/venues/mn.mpls.club331.py

100 lines
2.8 KiB
Python

import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from lxml import html
from events.models import Organization, Scraper, Event
import events.digitools as digitools
def process_times(times):
time = []
for t in times:
t = t.replace("\n", "").replace("TBA", "")
if len(t) > 0 and t.endswith("pm"):
if "-" in t:
t = t.split("-")[0] + "pm"
time.append(t)
return time
venue, created = Organization.objects.get_or_create(
name="Club 331",
city="Minneapolis",
website="https://331club.com",
is_venue=True,
)
scraper,item_count_start,virtcal = digitools.getScraper(venue, venue.website, 'msp')
DATETIME_FORMAT = '%b %d %I%p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
br.find_element(By.CLASS_NAME, 'more_events').click()
sleep(2)
ps = html.fromstring(br.page_source)
sleep(3)
dates = ps.xpath('.//*/div[@class="event"]')
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
events = []
for d in dates:
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
cols = d.xpath('.//div[@class="column"]')
for c in cols:
bands = c.xpath('.//p/a/text()')
links = c.xpath('.//p/a/@href')
time = process_times(c.xpath('.//p/text()'))
event = {}
event["datetime"] = event_date + time + [current_year]
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
except:
event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands)
if len(bands) > 0:
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
event['title'] = event["bands"]
event['date'] = event["date_time"]
event['dateStamp'] = event["date_time"]
event['link'] = "https://331club.com/#calendar"
try:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print('oops', e)
events.append(event)
br.close()
digitools.updateScraper(scraper, item_count_start)