first commit
This commit is contained in:
71
event_scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
71
event_scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
from events.models import Organization, Scraper, Calendar, Event
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Acme Comedy Club",
|
||||
city="Minneapolis",
|
||||
website="https://acmecomedycompany.com/the-club/calendar/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/li[@class="event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/span[@class="day"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
|
||||
digitools.createBasicEvent(event, "Co", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
br.close()
|
||||
quit()
|
||||
|
||||
links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Co")
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
br.close()
|
||||
67
event_scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
67
event_scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Amsterdam Bar & Hall",
|
||||
city="St. Paul",
|
||||
website="https://www.amsterdambarandhall.com/events-new/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="date-day"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//div/h4/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
|
||||
if " presents" in event['title']:
|
||||
event['title'] = event['title'].split("presents")[1][1:].strip()
|
||||
if event['title'].startswith('.'):
|
||||
print("BLAHH\n")
|
||||
event['title'] = event['title'][1:].strip()
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps)
|
||||
sleep(3)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
event_scrapers/Working/venues/EastsideLibrary.py
Normal file
70
event_scrapers/Working/venues/EastsideLibrary.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eastside Freedom Library",
|
||||
city="Minneapolis",
|
||||
website="https://eastsidefreedomlibrary.org/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/article')
|
||||
# ppr("contents:", contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/text()')[0].strip()
|
||||
event['link'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/@href')[0]
|
||||
event['date'] = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].strip() + " " + current_year
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
try:
|
||||
new_event = digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://eastsidefreedomlibrary.org/events/'
|
||||
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
|
||||
get_events(ps)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
178
event_scrapers/Working/venues/FirstAveScrape.py
Normal file
178
event_scrapers/Working/venues/FirstAveScrape.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="First Avenue",
|
||||
city="Minneapolis",
|
||||
website="https://first-avenue.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I%p'
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
month = int(datetime.now().month)
|
||||
day = int(datetime.now().day)
|
||||
|
||||
if month == 12:
|
||||
next_month = "01"
|
||||
else:
|
||||
next_month = month + 1
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
|
||||
year = int(datetime.now().year)
|
||||
|
||||
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
|
||||
|
||||
next_month_string = str(next_month) + "01"
|
||||
|
||||
if next_month == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
if int(next_month) == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
|
||||
|
||||
|
||||
print("\n\n", calendar_url, calendar_url_2, "\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
if datetime.now().day < 8:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
elif 7 < datetime.now().day < 15:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
elif 14 < datetime.now().day < 21:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
|
||||
else:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
|
||||
events = []
|
||||
|
||||
def get_info(pse):
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
if event["show_title"] == "":
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
|
||||
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
|
||||
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
|
||||
try:
|
||||
event = get_date(pse, event)
|
||||
except Exception as e:
|
||||
print("date issue: ", e)
|
||||
try:
|
||||
event = get_details(pse, event)
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
print("Using alt date format 2: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
|
||||
ppr(event)
|
||||
except Exception as e:
|
||||
print("Using alt date format 3: ", e)
|
||||
print(event['date'])
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
|
||||
return event
|
||||
|
||||
def get_date(pse, event):
|
||||
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
event["date"] = [month, day, year]
|
||||
return event
|
||||
|
||||
def get_details(pse, event):
|
||||
try:
|
||||
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
|
||||
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
|
||||
di = zip(details, info)
|
||||
details = {}
|
||||
for d,i in di:
|
||||
details[d] = i
|
||||
event["details"] = details
|
||||
return event
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
|
||||
for show in shows:
|
||||
br.get(show)
|
||||
sleep(2)
|
||||
try:
|
||||
pse = html.fromstring(br.page_source)
|
||||
except Exception as e:
|
||||
print(show)
|
||||
pass
|
||||
try:
|
||||
event = get_info(pse)
|
||||
except Exception as e:
|
||||
print("get_info error: ", e)
|
||||
try:
|
||||
event["link"] = show
|
||||
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
|
||||
else:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
|
||||
except Exception as e:
|
||||
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
|
||||
try:
|
||||
event['dateStamp'] = event['date_time']
|
||||
event['scraper'] = scraper
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
||||
quit()
|
||||
|
||||
ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
68
event_scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
68
event_scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Ginkgo Coffee",
|
||||
city="Saint Paul",
|
||||
website="https://ginkgocoffee.com/events/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
dateTime = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split(' ')[0]
|
||||
year = datetime.now().year
|
||||
if month == "January":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
event['date'] = " ".join([ dateTime, str(year)])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/h3/a/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
event_scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
71
event_scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Green Room",
|
||||
city="Minneapolis",
|
||||
website="https://www.greenroommn.com/events",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
|
||||
date = c.xpath('.//*/span[@class="vp-date"]/text()')[0].strip()
|
||||
month = date.split(" ")[1]
|
||||
year = datetime.now().year
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['datetime'] = date + " " + str(year) + " " + time
|
||||
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, event_type)
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
96
event_scrapers/Working/venues/HookLadderScrape.py
Normal file
96
event_scrapers/Working/venues/HookLadderScrape.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from lxml import html
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Hook & Ladder",
|
||||
city="Minneapolis",
|
||||
website="https://thehookmpls.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
ppr(scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
"https://thehookmpls.com/events/list/page/1",
|
||||
"https://thehookmpls.com/events/list/page/2",
|
||||
"https://thehookmpls.com/events/list/page/3"
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
def get_listings(pse, events):
|
||||
nevents = pse.xpath('.//*/article')
|
||||
for event in nevents:
|
||||
e = {}
|
||||
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
|
||||
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
|
||||
try:
|
||||
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
|
||||
except:
|
||||
continue
|
||||
try:
|
||||
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
|
||||
except:
|
||||
e['price'] = "See Link"
|
||||
e['image'] = event.xpath('.//*/img/@data-src')[0]
|
||||
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
|
||||
e['scraper'] = scraper
|
||||
e['calendar'] = scraper.calendar
|
||||
events.append(e)
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
br.get(cal)
|
||||
sleep(3)
|
||||
pse = html.fromstring(br.page_source)
|
||||
get_listings(pse, events)
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = event['calendar'],
|
||||
scraper = event['scraper'],
|
||||
event_type = 'Mu',
|
||||
show_title = event["show_title"],
|
||||
show_link = event["link"],
|
||||
show_date = event["date_time"],
|
||||
show_day = event["date_time"],
|
||||
guests = " ".join(event["subtitle"]),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
event_scrapers/Working/venues/MagersQuinn.py
Normal file
70
event_scrapers/Working/venues/MagersQuinn.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Magers & Quinn",
|
||||
city="Minneapolis",
|
||||
website="https://www.magersandquinn.com/events",
|
||||
is_venue=False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="day has-event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="dd"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="month"]/text()')[0]
|
||||
year = c.xpath('.//*/div[@class="year"]/text()')[0]
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/text()')[0]
|
||||
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
|
||||
print(e)
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
|
||||
links = digitools.createBasicURL("https://www.magersandquinn.com/events/")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
80
event_scrapers/Working/venues/MplsVFW.py
Normal file
80
event_scrapers/Working/venues/MplsVFW.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Uptown VFW",
|
||||
city="Minneapolis",
|
||||
website="https://noboolpresents.com/venues/uptown-vfw/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(30)
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
events = ps.xpath('.//*/article')
|
||||
for event in events:
|
||||
deets = {}
|
||||
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("•", "").strip() + " " + current_year
|
||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||
deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = deets["tickets"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("oops", e)
|
||||
|
||||
getEvents(br)
|
||||
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
||||
sleep(5)
|
||||
getEvents(br)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
106
event_scrapers/Working/venues/ParkwayTheater.py
Normal file
106
event_scrapers/Working/venues/ParkwayTheater.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event as DSEvent
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
try:
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Parkway Theater",
|
||||
city="Minneapolis",
|
||||
website="https://theparkwaytheater.com",
|
||||
is_venue = True
|
||||
)
|
||||
except Exception as e:
|
||||
venue = Organization.objects.get(name="Parkway Theater")
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
|
||||
img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src')
|
||||
ps.xpath('.//*/span[@class="event-time-12hr"]/text()')
|
||||
for c,i in zip(contents,img_etc):
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
|
||||
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['desc'] = c.xpath('.//*/p/text()')[0]
|
||||
event['img_link'] = i
|
||||
event['details'] = {
|
||||
'description': event['desc'],
|
||||
'img_link': event['img_link'],
|
||||
}
|
||||
|
||||
try:
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
try:
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0].split("–")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://theparkwaytheater.com/live-events'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Mu")
|
||||
|
||||
calendar_url = "https://theparkwaytheater.com/movies"
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Th")
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
98
event_scrapers/Working/venues/SPCO.stp.py
Normal file
98
event_scrapers/Working/venues/SPCO.stp.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="St Paul Chamber Orchestra",
|
||||
city="St Paul",
|
||||
website="https://thespco.org",
|
||||
is_venue = False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y – %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
|
||||
for c in set(contents):
|
||||
try:
|
||||
link = 'https://content.thespco.org' + c
|
||||
ps = digitools.getSource(br, link)
|
||||
ntitle = ps.xpath('.//*/article/h1/text()')
|
||||
subtitle = ps.xpath('.//*/article/h1/em/text()')
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
if len(subtitle) == 1:
|
||||
if len(ntitle) == 2:
|
||||
title = ntitle[0] + subtitle[0] + ntitle[1]
|
||||
elif ntitle[0].startswith(" "):
|
||||
title = subtitle[0] + ntitle[0]
|
||||
else:
|
||||
title = ntitle[0] + subtitle[0]
|
||||
else:
|
||||
title = ntitle[0]
|
||||
|
||||
events = ps.xpath('.//*/div[@class="day"]')
|
||||
for e in events:
|
||||
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
|
||||
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
|
||||
if 'Minneapolis' in location:
|
||||
location = 'Minneapolis'
|
||||
elif 'St. Paul' in location:
|
||||
location = 'St. Paul'
|
||||
else:
|
||||
location = location
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name=new_venue,
|
||||
city=location,
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
|
||||
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
|
||||
event['venue'] = venue
|
||||
event['location'] = location
|
||||
event['title'] = "SPCO: " + title
|
||||
event['link'] = link
|
||||
event_type = "Mu"
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("ERROR: ", e)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
event_scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
71
event_scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p'
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="White Squirrel",
|
||||
city="St. Paul",
|
||||
website="https://whitesquirrelbar.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
'https://whitesquirrelbar.com/calendar/list/page/1/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/2/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/3/'
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
def get_listings(pse, events):
|
||||
listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
|
||||
for l in listings:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
try:
|
||||
event["image"] = l.xpath('.//*/img/@src')[0]
|
||||
except:
|
||||
event["image"] = "none"
|
||||
event["date"] = l.xpath('.//time/@datetime')[0]
|
||||
event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1]
|
||||
event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "")
|
||||
event["link"] = l.xpath('.//*/h3/a/@href')[0]
|
||||
event['datetime'] = event['date'] + " " + event['time']
|
||||
event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT)
|
||||
events.append(event)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
ps = digitools.getSource(br, cal)
|
||||
get_listings(ps, events)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
74
event_scrapers/Working/venues/cedar.mpls.py
Normal file
74
event_scrapers/Working/venues/cedar.mpls.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cedar Cultural Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.thecedar.org",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
|
||||
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
|
||||
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
|
||||
for l in links:
|
||||
if "cedar-news-blog" in l:
|
||||
continue
|
||||
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = "https://www.thecedar.org" + l
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except:
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
dateStamp = date + " " + time
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
calendar_url = 'https://www.thecedar.org/listing'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
105
event_scrapers/Working/venues/club331Scrape.py
Normal file
105
event_scrapers/Working/venues/club331Scrape.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Club 331",
|
||||
city="Minneapolis",
|
||||
website="https://331club.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %I%p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://331club.com/#calendar'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(3)
|
||||
|
||||
br.find_element(By.CLASS_NAME, 'more_events').click()
|
||||
sleep(2)
|
||||
ps = html.fromstring(br.page_source)
|
||||
sleep(3)
|
||||
|
||||
dates = ps.xpath('.//*/div[@class="event"]')
|
||||
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
|
||||
|
||||
def process_times(times):
|
||||
# print("Times: ", times)
|
||||
time = []
|
||||
for t in times:
|
||||
t = t.replace("\n", "").replace("TBA", "")
|
||||
if len(t) > 0 and t.endswith("pm"):
|
||||
if "-" in t:
|
||||
t = t.split("-")[0] + "pm"
|
||||
time.append(t)
|
||||
return time
|
||||
|
||||
events = []
|
||||
|
||||
for d in dates:
|
||||
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
|
||||
cols = d.xpath('.//div[@class="column"]')
|
||||
for c in cols:
|
||||
bands = c.xpath('.//p/a/text()')
|
||||
links = c.xpath('.//p/a/@href')
|
||||
time = process_times(c.xpath('.//p/text()'))
|
||||
event = {}
|
||||
event["datetime"] = event_date + time + [current_year]
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
|
||||
except:
|
||||
event["date_time"] = "Invalid"
|
||||
event["bands"] = (", ").join(bands)
|
||||
if len(bands) > 0:
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = event["bands"]
|
||||
event['date'] = event["date_time"]
|
||||
event['dateStamp'] = event["date_time"]
|
||||
event['link'] = "https://331club.com/#calendar"
|
||||
|
||||
try:
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
except Exception as e:
|
||||
print('oops', e)
|
||||
events.append(event)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
Reference in New Issue
Block a user