Files
scrapers/Working/venues/mn.stp.SPCO.py

98 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os, sys
from datetime import datetime
from dateutil import relativedelta
sys.path.append('/var/www/digisnaxx.ado/scrapers')
import dtss
dtss.getReady()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="St Paul Chamber Orchestra",
city="St Paul",
website="https://thespco.org",
is_venue = False
)
scraper,item_count_start, virtcal = digitools.getScraper(venue, venue.website, 'msp')
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
for c in set(contents):
try:
link = 'https://content.thespco.org' + c
ps = digitools.getSource(br, link)
ntitle = ps.xpath('.//*/article/h1/text()')
subtitle = ps.xpath('.//*/article/h1/em/text()')
event = {}
event['scraper'] = scraper
event['calendars'] = [scraper.calendar]
if len(subtitle) == 1:
if len(ntitle) == 2:
title = ntitle[0] + subtitle[0] + ntitle[1]
elif ntitle[0].startswith(" "):
title = subtitle[0] + ntitle[0]
else:
title = ntitle[0] + subtitle[0]
else:
title = ntitle[0]
events = ps.xpath('.//*/div[@class="day"]')
for e in events:
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
if 'Minneapolis' in location:
location = 'Minneapolis'
elif 'St. Paul' in location:
location = 'St. Paul'
else:
location = location
venue, created = Organization.objects.get_or_create(
name=new_venue,
city=location,
is_venue = True
)
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
event['venue'] = venue
event['location'] = location
event['title'] = "SPCO: " + title
event['link'] = link
event_type = "Mu"
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print("ERROR: ", e)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
# Get Event Page Link(s)
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Mu")
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)