moved scrapers into app_dir events

This commit is contained in:
2025-10-11 03:55:16 -05:00
parent 187c2d1cd7
commit 1adc692756
67 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,71 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
import events.digitools as digitools
from events.models import Organization, Scraper, Calendar, Event
venue, created = Organization.objects.get_or_create(
name="Acme Comedy Club",
city="Minneapolis",
website="https://acmecomedycompany.com/the-club/calendar/",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/li[@class="event"]')
for c in contents:
try:
event = {}
day = c.xpath('.//*/span[@class="day"]/text()')[0]
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
year = datetime.now().year
if month == "Jan":
year = int(year) + 1
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
event['date'] = " ".join(event['date'])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
digitools.createBasicEvent(event, "Co", venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
br.close()
quit()
links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/")
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Co")
digitools.updateScraper(scraper, item_count_start)
br.close()

View File

@@ -0,0 +1,67 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Calendar
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Amsterdam Bar & Hall",
city="St. Paul",
website="https://www.amsterdambarandhall.com/events-new/",
is_venue=True
)
scraper,item_count_start = digitools.getScraper(venue)
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps):
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
for c in contents:
try:
event = {}
day = c.xpath('.//*/div[@class="date-day"]/text()')[0]
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
year = datetime.now().year
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//div/h4/a/text()')[0]
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
if " presents" in event['title']:
event['title'] = event['title'].split("presents")[1][1:].strip()
if event['title'].startswith('.'):
print("BLAHH\n")
event['title'] = event['title'][1:].strip()
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print(e)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,70 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
current_year = str(datetime.now().year)
venue, created = Organization.objects.get_or_create(
name="Eastside Freedom Library",
city="Minneapolis",
website="https://eastsidefreedomlibrary.org/events/",
is_venue=True
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
def get_events(ps):
contents = ps.xpath('.//*/article')
# ppr("contents:", contents)
for c in contents:
try:
event = {}
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/text()')[0].strip()
event['link'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/@href')[0]
event['date'] = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].strip() + " " + current_year
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
try:
new_event = digitools.createBasicEvent(event, "Ed", venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n+++\n")
except Exception as e:
print(e)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://eastsidefreedomlibrary.org/events/'
ps = digitools.getSource(br, calendar_url)
get_events(ps)
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,178 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
from lxml import html
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="First Avenue",
city="Minneapolis",
website="https://first-avenue.com",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %Y %I%p'
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
month = int(datetime.now().month)
day = int(datetime.now().day)
if month == 12:
next_month = "01"
else:
next_month = month + 1
if next_month < 10:
next_month = "0" + str(next_month)
if month < 10:
month = "0" + str(month)
year = int(datetime.now().year)
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
next_month_string = str(next_month) + "01"
if next_month == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
if int(next_month) == 1:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
else:
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
print("\n\n", calendar_url, calendar_url_2, "\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
if datetime.now().day < 8:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
elif 7 < datetime.now().day < 15:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
elif 14 < datetime.now().day < 21:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
else:
ps = digitools.getSource(br, calendar_url)
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
ps = digitools.getSource(br, calendar_url_2)
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
events = []
def get_info(pse):
event = {}
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
if event["show_title"] == "":
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
try:
event = get_date(pse, event)
except Exception as e:
print("date issue: ", e)
try:
event = get_details(pse, event)
except Exception as e:
print("details issue: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
except Exception as e:
print("Using alt date format 2: ", e)
try:
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
ppr(event)
except Exception as e:
print("Using alt date format 3: ", e)
print(event['date'])
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
return event
def get_date(pse, event):
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
event["date"] = [month, day, year]
return event
def get_details(pse, event):
try:
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
di = zip(details, info)
details = {}
for d,i in di:
details[d] = i
event["details"] = details
return event
except Exception as e:
print("details issue: ", e)
for show in shows:
br.get(show)
sleep(2)
try:
pse = html.fromstring(br.page_source)
except Exception as e:
print(show)
pass
try:
event = get_info(pse)
except Exception as e:
print("get_info error: ", e)
try:
event["link"] = show
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
else:
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
except Exception as e:
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
try:
event['dateStamp'] = event['date_time']
event['scraper'] = scraper
new_event, created = digitools.createDetailedEvent(event, "Mu", venue)
scraper.items+=1
except Exception as e:
print("event creation error: ", e, "\n\n", event, "\n\n", created)
quit()
ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()

View File

@@ -0,0 +1,68 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Ginkgo Coffee",
city="Saint Paul",
website="https://ginkgocoffee.com/events/",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
event_type = ""
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/article')
for c in contents:
try:
event = {}
dateTime = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
month = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split(' ')[0]
year = datetime.now().year
if month == "January":
year = int(year) + 1
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
event['date'] = " ".join([ dateTime, str(year)])
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = c.xpath('.//*/h3/a/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
except Exception as e:
print(e)
ppr(event)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, "Mu")
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,71 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Green Room",
city="Minneapolis",
website="https://www.greenroommn.com/events",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
event_type = "Mu"
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
for c in contents:
try:
event = {}
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
date = c.xpath('.//*/span[@class="vp-date"]/text()')[0].strip()
month = date.split(" ")[1]
year = datetime.now().year
# if month == "Jan":
# year = int(year) + 1
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
event['datetime'] = date + " " + str(year) + " " + time
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print(e)
ppr(event)
print("\n+++\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
ps = digitools.getSource(br, venue.website)
get_events(ps, event_type)
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,96 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
from lxml import html
count = 0
venue, created = Organization.objects.get_or_create(
name="Hook & Ladder",
city="Minneapolis",
website="https://thehookmpls.com",
is_venue=True,
)
scraper,item_count_start = digitools.getScraper(venue)
ppr(scraper)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
# Set initial variables for City, etc
calendar_url = [
"https://thehookmpls.com/events/list/page/1",
"https://thehookmpls.com/events/list/page/2",
"https://thehookmpls.com/events/list/page/3"
]
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
def get_listings(pse, events):
nevents = pse.xpath('.//*/article')
for event in nevents:
e = {}
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
try:
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
except:
continue
try:
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
except:
e['price'] = "See Link"
e['image'] = event.xpath('.//*/img/@data-src')[0]
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
e['scraper'] = scraper
e['calendar'] = scraper.calendar
events.append(e)
events = []
for cal in calendar_url:
br.get(cal)
sleep(3)
pse = html.fromstring(br.page_source)
get_listings(pse, events)
for event in events:
try:
new_event = Event.objects.update_or_create(
calendar = event['calendar'],
scraper = event['scraper'],
event_type = 'Mu',
show_title = event["show_title"],
show_link = event["link"],
show_date = event["date_time"],
show_day = event["date_time"],
guests = " ".join(event["subtitle"]),
venue = venue
)
except Exception as e:
print("oops ", e, "\n\n", "Scraper:", scraper)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,70 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Magers & Quinn",
city="Minneapolis",
website="https://www.magersandquinn.com/events",
is_venue=False
)
scraper,item_count_start = digitools.getScraper(venue)
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="day has-event"]')
for c in contents:
try:
event = {}
day = c.xpath('.//*/div[@class="dd"]/text()')[0]
month = c.xpath('.//*/div[@class="month"]/text()')[0]
year = c.xpath('.//*/div[@class="year"]/text()')[0]
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/h3/text()')[0]
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
event['date'] = " ".join(event['date'])
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
digitools.createBasicEvent(event, "Ed", venue)
scraper.items+=1
except Exception as e:
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
print(e)
ppr(event)
digitools.createBasicEvent(event, "Ed", venue)
print("\n\n+++\n\n")
links = digitools.createBasicURL("https://www.magersandquinn.com/events/")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Ed")
sleep(3)
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,80 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event
import events.digitools as digitools
from selenium.webdriver.common.by import By
from lxml import html
venue, created = Organization.objects.get_or_create(
name="Uptown VFW",
city="Minneapolis",
website="https://noboolpresents.com/venues/uptown-vfw/",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(30)
def getEvents(br):
ps = html.fromstring(br.page_source)
events = ps.xpath('.//*/article')
for event in events:
deets = {}
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("", "").strip() + " " + current_year
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
try:
new_event = Event.objects.update_or_create(
calendar = scraper.calendar,
scraper = scraper,
event_type = 'Mu',
show_title = title,
show_link = link,
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
more_details = deets["tickets"],
venue = venue
)
scraper.items+=1
except Exception as e:
print("oops", e)
getEvents(br)
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
sleep(5)
getEvents(br)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,106 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper, Event as DSEvent
import events.digitools as digitools
try:
venue, created = Organization.objects.get_or_create(
name="Parkway Theater",
city="Minneapolis",
website="https://theparkwaytheater.com",
is_venue = True
)
except Exception as e:
venue = Organization.objects.get(name="Parkway Theater")
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src')
ps.xpath('.//*/span[@class="event-time-12hr"]/text()')
for c,i in zip(contents,img_etc):
try:
event = {}
event['calendar'] = scraper.calendar
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
event['desc'] = c.xpath('.//*/p/text()')[0]
event['img_link'] = i
event['details'] = {
'description': event['desc'],
'img_link': event['img_link'],
}
try:
new_event = DSEvent.objects.update_or_create(
calendar = scraper.calendar,
scraper = scraper,
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
scraper.items+=1
except Exception as e:
try:
event['date'] = c.xpath('.//div/div/time/text()')[0].split("")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("")[0].strip()
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
new_event = DSEvent.objects.update_or_create(
calendar = scraper.calendar,
scraper = scraper,
event_type = event_type,
show_title = event['title'],
show_link = event['link'],
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
more_details = event["details"],
venue = venue
)
scraper.items+=1
except Exception as e:
print(e)
print("\n\n+++\n\n")
except Exception as e:
continue
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://theparkwaytheater.com/live-events'
ps = digitools.getSource(br, calendar_url)
get_events(ps, "Mu")
calendar_url = "https://theparkwaytheater.com/movies"
ps = digitools.getSource(br, calendar_url)
get_events(ps, "Th")
# ppr(events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,98 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="St Paul Chamber Orchestra",
city="St Paul",
website="https://thespco.org",
is_venue = False
)
scraper,item_count_start = digitools.getScraper(venue)
# Time Signatures
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
def get_events(ps, event_type):
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
for c in set(contents):
try:
link = 'https://content.thespco.org' + c
ps = digitools.getSource(br, link)
ntitle = ps.xpath('.//*/article/h1/text()')
subtitle = ps.xpath('.//*/article/h1/em/text()')
event = {}
event['scraper'] = scraper
event['calendar'] = scraper.calendar
if len(subtitle) == 1:
if len(ntitle) == 2:
title = ntitle[0] + subtitle[0] + ntitle[1]
elif ntitle[0].startswith(" "):
title = subtitle[0] + ntitle[0]
else:
title = ntitle[0] + subtitle[0]
else:
title = ntitle[0]
events = ps.xpath('.//*/div[@class="day"]')
for e in events:
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
if 'Minneapolis' in location:
location = 'Minneapolis'
elif 'St. Paul' in location:
location = 'St. Paul'
else:
location = location
venue, created = Organization.objects.get_or_create(
name=new_venue,
city=location,
is_venue = True
)
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
event['venue'] = venue
event['location'] = location
event['title'] = "SPCO: " + title
event['link'] = link
event_type = "Mu"
digitools.createBasicEvent(event, event_type, venue)
scraper.items+=1
except Exception as e:
print("ERROR: ", e)
print("\n\n+++\n\n")
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
# Get Event Page Link(s)
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
for link in links:
ps = digitools.getSource(br, link)
get_events(ps, "Mu")
sleep(3)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,71 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p'
venue, created = Organization.objects.get_or_create(
name="White Squirrel",
city="St. Paul",
website="https://whitesquirrelbar.com",
is_venue = True
)
scraper,item_count_start = digitools.getScraper(venue)
# Set initial variables for City, etc
calendar_url = [
'https://whitesquirrelbar.com/calendar/list/page/1/',
'https://whitesquirrelbar.com/calendar/list/page/2/',
'https://whitesquirrelbar.com/calendar/list/page/3/'
]
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
def get_listings(pse, events):
listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
for l in listings:
event = {}
event['scraper'] = scraper
event['calendar'] = scraper.calendar
try:
event["image"] = l.xpath('.//*/img/@src')[0]
except:
event["image"] = "none"
event["date"] = l.xpath('.//time/@datetime')[0]
event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1]
event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "")
event["link"] = l.xpath('.//*/h3/a/@href')[0]
event['datetime'] = event['date'] + " " + event['time']
event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT)
events.append(event)
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
events = []
for cal in calendar_url:
ps = digitools.getSource(br, cal)
get_listings(ps, events)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,74 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from events.models import Organization, Scraper
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Cedar Cultural Center",
city="Minneapolis",
website="https://www.thecedar.org",
is_venue=True
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
def get_events(ps):
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
for l in links:
if "cedar-news-blog" in l:
continue
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
event = {}
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['link'] = "https://www.thecedar.org" + l
try:
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
except:
try:
time = pse.xpath('.//*/time[@class="event-time-localized"]/text()')[0]
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
except Exception as e:
print(e)
print("failed event: ", event)
dateStamp = date + " " + time
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
digitools.createBasicEvent(event, "Mu", venue)
scraper.items+=1
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
calendar_url = 'https://www.thecedar.org/listing'
ps = digitools.getSource(br, calendar_url)
get_events(ps)
br.close()
digitools.updateScraper(scraper, item_count_start)

View File

@@ -0,0 +1,105 @@
import os, sys
from datetime import datetime
from dateutil import relativedelta
import django
sys.path.append('../../../')
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
django.setup()
from time import sleep
from pprint import pprint as ppr
import pytz
from selenium.webdriver.common.by import By
from lxml import html
from events.models import Organization, Scraper, Event
import events.digitools as digitools
venue, created = Organization.objects.get_or_create(
name="Club 331",
city="Minneapolis",
website="https://331club.com",
is_venue=True,
)
scraper,item_count_start = digitools.getScraper(venue)
tz = pytz.timezone("US/Central")
DATETIME_FORMAT = '%b %d %I%p %Y'
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
DATETIME_FORMAT_3 = '%b %d %Y'
# Set initial variables for City, etc
calendar_url = 'https://331club.com/#calendar'
current_year = str(datetime.now().year)
if len(sys.argv) >= 2:
arg1 = sys.argv[1]
br = digitools.getBrowser(arg1)
else:
print("No run_env")
quit()
br.get(calendar_url)
sleep(3)
br.find_element(By.CLASS_NAME, 'more_events').click()
sleep(2)
ps = html.fromstring(br.page_source)
sleep(3)
dates = ps.xpath('.//*/div[@class="event"]')
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
def process_times(times):
# print("Times: ", times)
time = []
for t in times:
t = t.replace("\n", "").replace("TBA", "")
if len(t) > 0 and t.endswith("pm"):
if "-" in t:
t = t.split("-")[0] + "pm"
time.append(t)
return time
events = []
for d in dates:
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
cols = d.xpath('.//div[@class="column"]')
for c in cols:
bands = c.xpath('.//p/a/text()')
links = c.xpath('.//p/a/@href')
time = process_times(c.xpath('.//p/text()'))
event = {}
event["datetime"] = event_date + time + [current_year]
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
except:
try:
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
except:
event["date_time"] = "Invalid"
event["bands"] = (", ").join(bands)
if len(bands) > 0:
event['scraper'] = scraper
event['calendar'] = scraper.calendar
event['title'] = event["bands"]
event['date'] = event["date_time"]
event['dateStamp'] = event["date_time"]
event['link'] = "https://331club.com/#calendar"
try:
digitools.createBasicEvent(event, "Mu", venue)
except Exception as e:
print('oops', e)
events.append(event)
br.close()
digitools.updateScraper(scraper, item_count_start)