first commit
This commit is contained in:
102
event_scrapers/Working/cals/MplStpMag.mn.py
Normal file
102
event_scrapers/Working/cals/MplStpMag.mn.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Mpls Stp Mag",
|
||||
city="Minneapolis",
|
||||
website="https://calendar.mspmag.com/calendars/all-events/",
|
||||
)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
td = relativedelta.relativedelta(days=1)
|
||||
fortnight = relativedelta.relativedelta(days=14)
|
||||
odt = datetime.now() + fortnight
|
||||
|
||||
# DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT = '%A, %B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_ALT = '%A, %B %d %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="css-card js-card day-card type-smad expandable"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = venue.calendar
|
||||
event_block = c.xpath('.//*/li[@class="card-listings-item event-element"]')
|
||||
date = c.xpath('.//div[@class="day-card__header day-card__header--daily"]/text()')[0].replace("\n", "").strip()
|
||||
if date == "Today":
|
||||
date = datetime.today()
|
||||
elif date == "Tomorrow":
|
||||
date = datetime.today() + td
|
||||
# month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
dateTime = datetime.strptime(date + " " + str(year), DATETIME_FORMAT_ALT)
|
||||
if dateTime > odt:
|
||||
print("DATE TIME ", dateTime)
|
||||
break
|
||||
for ev in event_block:
|
||||
time = ev.xpath('.//*/span[@class="card-listing-item-time"]/text()')[0].replace("@", "").strip()
|
||||
if time == "All day":
|
||||
time = "12:00pm"
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0] + " (Check link for times.)"
|
||||
elif "-" in time:
|
||||
time = time.split("-")[0]
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0]
|
||||
else:
|
||||
event['title'] = ev.xpath('.//*/div[@class="card-listing-item-title"]/text()')[0]
|
||||
|
||||
event['location'] = ev.xpath('.//*/span[@class="card-listing-item-location"]/text()')[0]
|
||||
if event['location'] == '7th St. Entry':
|
||||
event['location'] = '7th St Entry'
|
||||
elif event['location'] == '7th Street Entry':
|
||||
event['location'] = '7th St Entry'
|
||||
elif event['location'] == 'Amsterdam Bar and Hall':
|
||||
event['location'] = 'Amsterdam Bar & Hall'
|
||||
new_venue, created = Organization.objects.get_or_create(name=event['location'])
|
||||
print("V: ", new_venue, created)
|
||||
|
||||
event['dateTime'] = date + " " + str(year) + " " + time
|
||||
event['link'] = venue.website + c.xpath('.//@data-event')[0]
|
||||
event['dateStamp'] = datetime.strptime(event['dateTime'], DATETIME_FORMAT)
|
||||
|
||||
|
||||
|
||||
createBasicEvent(event, event_type, new_venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
# links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
ps = getSource(br, venue.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
105
event_scrapers/Working/cals/minnestar.py
Normal file
105
event_scrapers/Working/cals/minnestar.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Minnestar",
|
||||
city="Minneapolis",
|
||||
website="https://minnestar.org/community/calendar",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=venue.name,
|
||||
website=venue.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%B %d %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
links = ps.xpath('.//*/div[@id="community-calendar-list-view-container"]/a/@href')
|
||||
ppr(links)
|
||||
for l in links:
|
||||
pse = getSource(br, l)
|
||||
sleep(1)
|
||||
event = {}
|
||||
event['calendar'] = venue.calendar
|
||||
event['link'] = l
|
||||
try:
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
endtime = pse.xpath('.//*/time/@datetime')[1]
|
||||
event['dateStamp'] = datetime.strptime(starttime, DATETIME_FORMAT)
|
||||
event['title'] = pse.xpath('.//*/h1[@class="heading-2"]/text()')[0]
|
||||
# event['detail-headers'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/strong/text()')
|
||||
# event['details'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/text()')
|
||||
|
||||
except:
|
||||
try:
|
||||
event['title'] = pse.xpath('.//*/h1[@class="heading-2"]/text()')[0]
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(starttime, DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
try:
|
||||
print(e)
|
||||
print('failed event: ', event)
|
||||
starttime = pse.xpath('.//*/time/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(starttime + ' 2025', DATETIME_FORMAT_2)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
ppr(event)
|
||||
try:
|
||||
createBasicEvent(event, "Ot", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('failed to create: ', event)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = getSource(br, venue.website)
|
||||
get_events(ps, "Ot")
|
||||
sleep(3)
|
||||
|
||||
scraper.save()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
# links = createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
# for link in links:
|
||||
# ps = getSource(br, link)
|
||||
# get_events(ps, "Ed")
|
||||
# sleep(3)
|
||||
|
||||
br.close()
|
||||
147
event_scrapers/Working/govt/MNLeg.py
Normal file
147
event_scrapers/Working/govt/MNLeg.py
Normal file
@@ -0,0 +1,147 @@
|
||||
# Install Chromedriver and Quarantine
|
||||
# xattr -d com.apple.quarantine <name-of-executable>
|
||||
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="MN Legislature",
|
||||
website="https://www.leg.mn.gov/cal?type=all",
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name="MN Legislature")
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://www.leg.mn.gov/cal?type=all'
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(60)
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
commEvents = ps.xpath('.//*/div[@class="card border-dark comm_item cal_item ml-lg-3"]')
|
||||
senateEvents = ps.xpath('.//*/div[@class="card border-dark senate_item cal_item ml-lg-3"]')
|
||||
houseEvents = ps.xpath('.//*/div[@class="card border-dark house_item cal_item ml-lg-3"]')
|
||||
meetings = []
|
||||
|
||||
for hE in houseEvents:
|
||||
details = {}
|
||||
dateTime = hE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = hE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = hE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + hE.xpath('.//*/div[@class="float-right text-center mr-2 d-print-none"]/a/@href')[0]
|
||||
except:
|
||||
link = hE.xpath('.//*/h3/a/@href')[0]
|
||||
details['location'] = hE.xpath('.//*/div[@class=""]/text()')[0]
|
||||
# print(dateTime, title, link, details['location'])
|
||||
venue, created = Organization.objects.get_or_create(name="MN House", city="St. Paul")
|
||||
new_event, created = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
for sE in senateEvents:
|
||||
details = {}
|
||||
dateTime = sE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = sE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = sE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + sE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
|
||||
except:
|
||||
link = sE.xpath('.//*/h3/a/@href')[0]
|
||||
location_list = sE.xpath('.//*/text()')
|
||||
if 'Location: ' in location_list:
|
||||
iN = location_list.index("Location: ")
|
||||
details['location'] = location_list[iN + 1]
|
||||
elif 'Senate Floor Session' in location_list:
|
||||
details['location'] = 'Senate Floor Session'
|
||||
venue, created = Organization.objects.get_or_create(name="MN Senate", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
for cE in commEvents:
|
||||
details = {}
|
||||
dateTime = cE.xpath('.//*/b/text()')[0]
|
||||
try:
|
||||
title = cE.xpath('.//*/h3/a/text()')[0]
|
||||
except:
|
||||
title = cE.xpath('.//*/h3/text()')[0]
|
||||
try:
|
||||
link = "https://www.leg.mn.gov/" + cE.xpath('.//*/div[@class="float-right text-center mr-2"]/a/@href')[0]
|
||||
except:
|
||||
link = cE.xpath('.//*/h3/a/@href')[0]
|
||||
location_list = cE.xpath('.//*/text()')
|
||||
if 'Room: ' in location_list:
|
||||
iN = location_list.index("Room: ")
|
||||
details['location'] = location_list[iN + 1]
|
||||
# print(dateTime, title, link, details['location'])
|
||||
venue, created = Organization.objects.get_or_create(name="MN Legislature", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = details['location'],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
99
event_scrapers/Working/govt/MplsCityCouncil.py
Normal file
99
event_scrapers/Working/govt/MplsCityCouncil.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Mpls City Council",
|
||||
website="https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming",
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name="Mpls City Council")
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %b %d, %Y %I:%M %p'
|
||||
|
||||
calendar_url = 'https://lims.minneapolismn.gov/Calendar/citycouncil/upcoming'
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(25)
|
||||
# br.find_element(By.XPATH, '//*/li[@class="tab-header-small"]/a').click()
|
||||
# sleep(15)
|
||||
# all_entries = Select(br.find_element(By.XPATH, '//*/select'))
|
||||
# all_entries.select_by_value('50')
|
||||
# sleep(15)
|
||||
|
||||
ps = html.fromstring(br.page_source)
|
||||
|
||||
dayBlocks = ps.xpath('.//*/div[@class="ng-scope"]')
|
||||
meetings = []
|
||||
|
||||
for dB in dayBlocks:
|
||||
date = dB.xpath('.//div[@class="row"]/div/span[@class="ng-binding"]/text()')[0]
|
||||
events = dB.xpath('.//div[@class="upcoming ng-scope"]/div')
|
||||
for event in events:
|
||||
time = event.xpath('.//div/text()')[0]
|
||||
title = event.xpath('.//div/a/text()')[0].strip()
|
||||
if not len(title) > 0:
|
||||
title = event.xpath('.//div/span/a/text()')[0].strip()
|
||||
link = event.xpath('.//div/a/@href')[0]
|
||||
if link.startswith("/Download/"):
|
||||
link = calendar_url
|
||||
else:
|
||||
link = "https://lims.minneapolismn.gov" + link
|
||||
location = title.split(',')[-1].strip()
|
||||
mtg_title = title.split(',')[:-1]
|
||||
if len(mtg_title) > 1:
|
||||
mtg_title = (' -').join(mtg_title).strip()
|
||||
else:
|
||||
mtg_title = mtg_title[0].strip()
|
||||
dateTime = datetime.strptime(date + " " + time, DATETIME_FORMAT)
|
||||
if location == "City Hall":
|
||||
location = "Mpls City Hall"
|
||||
print(dateTime, location, mtg_title, link)
|
||||
print('\n\n++++\n\n')
|
||||
venue, created = Organization.objects.get_or_create(name=location, city="Minneapolis")
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = mtg_title,
|
||||
show_link = link,
|
||||
show_date = dateTime,
|
||||
show_day = dateTime,
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
73
event_scrapers/Working/govt/StPaulCityCouncil.py
Normal file
73
event_scrapers/Working/govt/StPaulCityCouncil.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import re, os, sys
|
||||
from datetime import datetime
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event, Organization, Scraper
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
from selenium import webdriver as wd
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
from xvfbwrapper import Xvfb
|
||||
from lxml import html
|
||||
import pytz
|
||||
from events.digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="St Paul City Council",
|
||||
website="https://www.stpaul.gov/calendar",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d, %Y at %I:%M %p'
|
||||
|
||||
calendar_url = 'https://www.stpaul.gov/calendar'
|
||||
city_site = "https://www.stpaul.gov"
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(3)
|
||||
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
eventBlocks = ps.xpath('.//*/div[@class="calendar__item views-row"]')
|
||||
|
||||
for eB in eventBlocks:
|
||||
title = eB.xpath('.//div/h3[@class="field-content calendar__title"]/text()')[0]
|
||||
link = city_site + eB.xpath('.//div/span[@class="field-content calendar__link"]/a/@href')[0]
|
||||
dateTime = eB.xpath('.//div[@class="views-field views-field-field-calendar-date-value"]/span/text()')[0]
|
||||
print(dateTime, title, link)
|
||||
print('\n\n++++\n\n')
|
||||
venue, created = Organization.objects.get_or_create(name="Somewhere in St Paul", city="St. Paul")
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = 'Gv',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
venue = venue
|
||||
)
|
||||
|
||||
getEvents(br)
|
||||
sleep(5)
|
||||
br.get("https://www.stpaul.gov/calendar?page=1")
|
||||
getEvents(br)
|
||||
|
||||
br.close()
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
116
event_scrapers/Working/govt/mngov.py
Normal file
116
event_scrapers/Working/govt/mngov.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
odt = datetime.now() + td
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="MN Launch",
|
||||
city="Minneapolis",
|
||||
website="https://mn.gov/launchmn/calendar",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=venue.name,
|
||||
website=venue.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
cal_url = "https://timelyapp.time.ly/api/calendars/54705514/export?format=ics&target=copy&start_date=2024-12-13"
|
||||
calendar_url = 'https://calendar.google.com/calendar/ical/uvkshlggh1h4ck08emab22btkum9hl94%40import.calendar.google.com/public/basic.ics'
|
||||
|
||||
objIcalData = requests.get(cal_url)
|
||||
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
|
||||
cfpa_events = []
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
for component in gcal.walk():
|
||||
event = {}
|
||||
event['strSummary'] = f"{(component.get('SUMMARY'))}"
|
||||
event['strDesc'] = component.get('DESCRIPTION')
|
||||
event['strLocation'] = component.get('LOCATION')
|
||||
event['dateStart'] = component.get('DTSTART')
|
||||
event['dateStamp'] = component.get('DTSTAMP')
|
||||
if event['dateStamp'] is not None:
|
||||
event['dateStamp'] = event['dateStart'].dt
|
||||
if event['dateStart'] is not None:
|
||||
try:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
except Exception as e:
|
||||
event['dateStart'] = event['dateStart'].dt
|
||||
|
||||
event['dateEnd'] = (component.get('DTEND'))
|
||||
if event['dateEnd'] is not None:
|
||||
event['dateEnd'] = event['dateEnd'].dt
|
||||
else:
|
||||
event['dateEnd'] = event['dateStart']
|
||||
if event['strSummary'] != 'None':
|
||||
event['details'] = {
|
||||
"description" : event['strDesc'],
|
||||
"Location" : event['strLocation'],
|
||||
}
|
||||
cfpa_events.append(event)
|
||||
now_now = datetime.now().astimezone(tz)
|
||||
try:
|
||||
if event['dateStart'] > now_now:
|
||||
print(event['strSummary'])
|
||||
new_event, created = DSEvent.objects.update_or_create(
|
||||
calendar = 'msp'
|
||||
event_type = event_type,
|
||||
show_title = event['strSummary'],
|
||||
show_link = venue.website,
|
||||
show_date = event['dateStart']-td,
|
||||
show_day = event['dateStart']-td,
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
if event['strLocation'] != None and event['strLocation'] != 'MN' and event['strLocation'] != 'Online':
|
||||
loc = event['strLocation'].split('@')
|
||||
new_venue_name = loc[0]
|
||||
if len(loc) > 1:
|
||||
address = loc[1].split(",")
|
||||
city = address[1].strip()
|
||||
new_venue, created = Organization.objects.get_or_create(
|
||||
name=new_venue_name,
|
||||
city=city,
|
||||
website="https://mn.gov/launchmn/calendar",
|
||||
)
|
||||
new_event.venue = new_venue
|
||||
new_event.save()
|
||||
else:
|
||||
new_event.venue = venue
|
||||
new_event.save()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Event: ", event['dateStart'], event['strSummary'])
|
||||
print("Clock: ", now_now)
|
||||
else:
|
||||
print("Failed: ", component.get('DESCRIPTION'))
|
||||
|
||||
scraper.save()
|
||||
|
||||
|
||||
50
event_scrapers/Working/iCal/ical.CAFAC.mpls.py
Normal file
50
event_scrapers/Working/iCal/ical.CAFAC.mpls.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Chicago Ave Fire Arts Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.cafac.org/classes",
|
||||
)
|
||||
event_type = "Ed"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Chicago Ave Fire Arts Center",
|
||||
website="https://calendar.google.com/calendar/ical/9qj2426rukra3jv933nslsf3r8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
44
event_scrapers/Working/iCal/ical_run.KJHideaway.StPaul.py
Normal file
44
event_scrapers/Working/iCal/ical_run.KJHideaway.StPaul.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="KJ's Hideaway",
|
||||
city="Minneapolis",
|
||||
website="",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="KJ's Hideaway",
|
||||
website="https://calendar.google.com/calendar/ical/sgmok5t13vspeoruhruh33dhj0hgc50q%40import.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
48
event_scrapers/Working/iCal/ical_run.SocialableCider.mpls.py
Normal file
48
event_scrapers/Working/iCal/ical_run.SocialableCider.mpls.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Sociable Ciderwerks",
|
||||
city="Minneapolis",
|
||||
website="https://sociablecider.com/events",
|
||||
)
|
||||
event_type = "Mu"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Sociable Ciderwerks",
|
||||
website="https://calendar.google.com/calendar/ical/c_oa7uitvkn871o1ojl5e1os4ve8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
48
event_scrapers/Working/iCal/ical_run.bunkers.py
Normal file
48
event_scrapers/Working/iCal/ical_run.bunkers.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Bunkers",
|
||||
city="Minneapolis",
|
||||
website="https://bunkersmusic.com/calendar/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Bunkers",
|
||||
website="https://calendar.google.com/calendar/ical/js94epu90r2et31aopons1ifm8%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
48
event_scrapers/Working/iCal/ical_run.cfpa.py
Normal file
48
event_scrapers/Working/iCal/ical_run.cfpa.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Center for Performing Arts",
|
||||
city="Minneapolis",
|
||||
website="https://www.cfpampls.com/events",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Center for Performing Arts",
|
||||
website="https://calendar.google.com/calendar/ical/6rpooudjg01vc8bjek1snu2ro0%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Ed"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
46
event_scrapers/Working/iCal/ical_run.eagles.py
Normal file
46
event_scrapers/Working/iCal/ical_run.eagles.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eagles #34",
|
||||
city="Minneapolis",
|
||||
website="https://www.minneapoliseagles34.org/events-entertainment.html",
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Eagles #34",
|
||||
website="https://calendar.google.com/calendar/ical/teflgutelllvla7r6vfcmjdjjo%40group.calendar.google.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
50
event_scrapers/Working/iCal/ical_run.terminalbar-mpls.py
Normal file
50
event_scrapers/Working/iCal/ical_run.terminalbar-mpls.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import requests, os, sys
|
||||
from icalendar import Calendar as iCalendar, Event
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from events.models import Event as DSEvent, Organization, Scraper, Calendar
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
td = relativedelta.relativedelta(hours=5)
|
||||
odt = datetime.now() + td
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Terminal Bar",
|
||||
city="Minneapolis",
|
||||
website="https://terminalbarmn.com",
|
||||
)
|
||||
event_type = "Mu"
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Terminal Bar",
|
||||
website="https://calendar.google.com/calendar/ical/terminalbar32%40gmail.com/public/basic.ics",
|
||||
calendar = Calendar.objects.get(id=1),
|
||||
items = 0,
|
||||
new_items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=venue.name)
|
||||
|
||||
item_count_start = scraper.items
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
objIcalData = requests.get(scraper.website)
|
||||
gcal = iCalendar.from_ical(objIcalData.text)
|
||||
tz = pytz.timezone("US/Central")
|
||||
digitools.getiCalEvents(gcal, scraper)
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
78
event_scrapers/Working/news/minnpost.mn.py
Normal file
78
event_scrapers/Working/news/minnpost.mn.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="MinnPost",
|
||||
city="Minneapolis",
|
||||
website="https://www.minnpost.com/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
try:
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name=org.name,
|
||||
website=org.website,
|
||||
items = 0,
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
scraper = Scraper.objects.get(name=org.name)
|
||||
print("Scraper: ", scraper)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
count = 0
|
||||
ppr(contents)
|
||||
for c in contents:
|
||||
try:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
ppr(article)
|
||||
print("Success")
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
68
event_scrapers/Working/news/racket.mn.py
Normal file
68
event_scrapers/Working/news/racket.mn.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Racket MN",
|
||||
website="https://racketmn.com",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Racket MN",
|
||||
city="Minneapolis",
|
||||
website="https://racketmn.com",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
count = 0
|
||||
contents = ps.xpath('.//*/div[@class="PostCard_stackedWrapper__S21Fy"]') + ps.xpath('.//*/div[@class="PostCard_wrapper__uteO3"]')
|
||||
for c in contents:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
try:
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//div/a/h3/text()')[0]
|
||||
article['link'] = org.website + c.xpath('.//div/a/@href')[1]
|
||||
createBasicArticle(article, event_type, org)
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
68
event_scrapers/Working/news/sahan.mn.py
Normal file
68
event_scrapers/Working/news/sahan.mn.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Sahan Journal",
|
||||
website="https://sahanjournal.com/",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Sahan Journal",
|
||||
city="Minneapolis",
|
||||
website="https://sahanjournal.com/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
count = 0
|
||||
for c in contents:
|
||||
try:
|
||||
if count > 10:
|
||||
br.close()
|
||||
quit()
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h2[@class="entry-title"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h2[@class="entry-title"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
count+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
63
event_scrapers/Working/news/unicornriot.py
Normal file
63
event_scrapers/Working/news/unicornriot.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
from events.digitools import getBrowser, createURL, createBasicArticle, getSource
|
||||
|
||||
scraper, created = Scraper.objects.get_or_create(
|
||||
name="Uniocorn Riot",
|
||||
website="https://unicornriot.ninja/",
|
||||
last_ran = datetime.now(),
|
||||
)
|
||||
|
||||
org, created = Organization.objects.get_or_create(
|
||||
name="Uniocorn Riot",
|
||||
city="Minneapolis",
|
||||
website="https://unicornriot.ninja/",
|
||||
is_venue=False,
|
||||
)
|
||||
|
||||
event_type = "Ja"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
for c in contents[:10]:
|
||||
try:
|
||||
article = {}
|
||||
article['title'] = c.xpath('.//*/h3[@class="title entry-title is-3"]/a/text()')[0]
|
||||
article['link'] = c.xpath('.//*/h3[@class="title entry-title is-3"]/a/@href')[0]
|
||||
createBasicArticle(article, event_type, org)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(article)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = getSource(br, org.website)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
scraper.save()
|
||||
132
event_scrapers/Working/smedia/bluesky.py
Normal file
132
event_scrapers/Working/smedia/bluesky.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from atproto import Client
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from socials.models import SocialLink, SocialPost
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
USERNAME = "dreamfreely.org"
|
||||
PASSWORD = "Futbol21!@"
|
||||
|
||||
client = Client()
|
||||
client.login(USERNAME, PASSWORD)
|
||||
feed = client.get_author_feed(USERNAME, limit = 100)
|
||||
|
||||
def createSocialLink(post):
|
||||
new_post, created = SocialLink.objects.update_or_create(
|
||||
uri = post['uri'],
|
||||
text = post['text'],
|
||||
link = post['link'],
|
||||
handle = post['handle'],
|
||||
likes = post['likes'],
|
||||
reposts = post['reposts'],
|
||||
quotes = post['quotes'],
|
||||
replies = post['replies'],
|
||||
created_at = post['created_at'],
|
||||
platform = 'bluesky',
|
||||
rt_uri = post['rt_uri'],
|
||||
rt_text = post['rt_text'],
|
||||
rt_link = post['rt_link'],
|
||||
rt_handle = post['rt_handle'],
|
||||
)
|
||||
# print(created, new_post)
|
||||
print("completed write")
|
||||
|
||||
tweets = []
|
||||
|
||||
print(len(feed.feed))
|
||||
|
||||
for post in feed.feed:
|
||||
post = post.post
|
||||
print("\n\nNEW POST\n\n")
|
||||
# try:
|
||||
# ppr(post.embed.record.record.author.handle)
|
||||
# ppr(post.embed.record.record.value.text.split("\n")[:2])
|
||||
# ppr(post.embed.record.record.value.embed.external.uri.split("?")[0])
|
||||
# ppr(post.embed.record.record.uri.split("feed.post/")[1])
|
||||
# except:
|
||||
# pass
|
||||
|
||||
if hasattr(post.record.embed, 'external'):
|
||||
p = {}
|
||||
try:
|
||||
p['link'] = post.record.embed.external.uri.split("?")[0]
|
||||
except:
|
||||
pass
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
|
||||
elif hasattr(post.embed, 'record'):
|
||||
p = {}
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
p['link'] = "blank"
|
||||
|
||||
try:
|
||||
p['rt_handle'] = post.embed.record.record.author.handle
|
||||
p['rt_text'] = " ".join(post.embed.record.record.value.text.split("\n")[:2])
|
||||
p['rt_uri'] = post.embed.record.record.uri.split("feed.post/")[1]
|
||||
p['rt_link'] = post.embed.record.record.value.embed.external.uri.split("?")[0]
|
||||
except:
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
|
||||
|
||||
else:
|
||||
p = {}
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['likes'] = post.like_count
|
||||
p['quotes'] = post.quote_count
|
||||
p['replies'] = post.reply_count
|
||||
p['reposts'] = post.repost_count
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
p['rt_handle'] = "blank"
|
||||
p['rt_text'] = "blank"
|
||||
p['rt_uri'] = "blank"
|
||||
p['rt_link'] = "blank"
|
||||
p['link'] = "blank"
|
||||
|
||||
# ppr(p)
|
||||
# tweets.append(p)
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialLink(p)
|
||||
except Exception as e:
|
||||
ppr(post.record.embed)
|
||||
print(e, "\nthis\n\n")
|
||||
72
event_scrapers/Working/smedia/bluesky_media.py
Normal file
72
event_scrapers/Working/smedia/bluesky_media.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
from atproto import Client
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from socials.models import SocialImg
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
USERNAME = "dreamfreely.org"
|
||||
PASSWORD = "Futbol21!@"
|
||||
|
||||
client = Client()
|
||||
client.login(USERNAME, PASSWORD)
|
||||
feed = client.get_author_feed(USERNAME, limit = 100)
|
||||
|
||||
def createSocialImg(post):
|
||||
new_post, created = SocialImg.objects.update_or_create(
|
||||
uri = post['uri'],
|
||||
text = post['text'],
|
||||
img_link = post['img_link'],
|
||||
handle = post['handle'],
|
||||
created_at = post['created_at'],
|
||||
platform = 'bluesky',
|
||||
)
|
||||
print(created, new_post)
|
||||
|
||||
tweets = []
|
||||
|
||||
print(len(feed.feed))
|
||||
|
||||
for post in feed.feed:
|
||||
post = post.post
|
||||
|
||||
# print(post, "\n\n")
|
||||
|
||||
# try:
|
||||
# ppr(post.embed.images[0].fullsize)
|
||||
# # ppr(post.embed.record.record.value.text.split("\n")[:2])
|
||||
# # ppr(post.embed.record.record.value.embed.external.uri.split("?")[0])
|
||||
# # ppr(post.embed.record.record.uri.split("feed.post/")[1])
|
||||
# except Exception as e:
|
||||
# print("failed:", e)
|
||||
|
||||
if hasattr(post.embed, 'images'):
|
||||
p = {}
|
||||
p['img_link'] = post.embed.images[0].fullsize
|
||||
p['text'] = " ".join(post.record.text.split("\n")[:2])
|
||||
p['handle'] = post.author.handle
|
||||
p['uri'] = post.uri.split("feed.post/")[1]
|
||||
p['created_at'] = post.record.created_at
|
||||
|
||||
# ppr(p)
|
||||
tweets.append(p)
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialImg(p)
|
||||
except Exception as e:
|
||||
ppr(post.embed)
|
||||
print(e, "\nthis\n\n")
|
||||
72
event_scrapers/Working/smedia/redsky.py
Normal file
72
event_scrapers/Working/smedia/redsky.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import praw
|
||||
|
||||
from socials.models import SocialLink, SocialPost
|
||||
# from digitools import getBrowser, createURL, createBasicEvent, getSource
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
|
||||
# timestamp = 1729322547223
|
||||
# dt_object = datetime.datetime.fromtimestamp(timestamp)
|
||||
# print(dt_object)
|
||||
|
||||
reddit = praw.Reddit(
|
||||
client_id="rxW3Ywqke6FZDP7pIhYYuw",
|
||||
client_secret="cg1VNl0I-RTuYUwgz16ryKh2wWKEcA",
|
||||
password="7CTu4sGFi9E0",
|
||||
user_agent="CultureClap",
|
||||
username="cultureclap",
|
||||
)
|
||||
|
||||
|
||||
def createSocialLink(post):
|
||||
new_post, created = SocialLink.objects.update_or_create(
|
||||
text = post['text'],
|
||||
link = post['link'],
|
||||
handle = post['handle'],
|
||||
likes = post['likes'],
|
||||
replies = post['replies'],
|
||||
platform = post['platform'],
|
||||
created_at = post['created_at'],
|
||||
rt_uri = 'blank',
|
||||
rt_text = 'blank',
|
||||
rt_link = 'blank',
|
||||
rt_handle = 'blank',
|
||||
)
|
||||
print(created, new_post)
|
||||
|
||||
count = 0
|
||||
|
||||
for item in reddit.user.me().upvoted():
|
||||
rdt = {}
|
||||
rdt['text'] = item.title + " | " + item.selftext
|
||||
rdt['handle'] = item.author.name
|
||||
rdt['link'] = item.url
|
||||
rdt['likes'] = item.ups
|
||||
rdt['replies'] = len(item.comments.list())
|
||||
rdt['created_at'] = datetime.fromtimestamp(item.created_utc)
|
||||
rdt['platform'] = 'reddit'
|
||||
|
||||
try:
|
||||
print('writing file')
|
||||
createSocialLink(rdt)
|
||||
count +=1
|
||||
if count > 50:
|
||||
quit()
|
||||
except Exception as e:
|
||||
ppr(item)
|
||||
print(e, "\nthis\n\n")
|
||||
# ppr(item)
|
||||
71
event_scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
71
event_scrapers/Working/venues/AcmeComedy.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
from events.models import Organization, Scraper, Calendar, Event
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Acme Comedy Club",
|
||||
city="Minneapolis",
|
||||
website="https://acmecomedycompany.com/the-club/calendar/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/li[@class="event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/span[@class="day"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="mth"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/span[@class="event_title"]/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//*/span[@class="event_time"]/text()')[0].strip()]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/span[@class="event_title"]/a/@href')[0]
|
||||
digitools.createBasicEvent(event, "Co", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
br.close()
|
||||
quit()
|
||||
|
||||
links = digitools.createURL("https://acmecomedycompany.com/the-club/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Co")
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
br.close()
|
||||
67
event_scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
67
event_scrapers/Working/venues/Amsterdam.StPaul.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Calendar
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Amsterdam Bar & Hall",
|
||||
city="St. Paul",
|
||||
website="https://www.amsterdambarandhall.com/events-new/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%B %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/ul[@class="events-list"]/li')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="date-day"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="date-month"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//div/h4/a/text()')[0]
|
||||
event['date'] = [month, day, str(year), c.xpath('.//div[@class="event-info"]/p/text()')[0].split(" ")[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//div[@class="event-info"]/h4/a/@href')[0]
|
||||
if " presents" in event['title']:
|
||||
event['title'] = event['title'].split("presents")[1][1:].strip()
|
||||
if event['title'].startswith('.'):
|
||||
print("BLAHH\n")
|
||||
event['title'] = event['title'][1:].strip()
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps)
|
||||
sleep(3)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
event_scrapers/Working/venues/EastsideLibrary.py
Normal file
70
event_scrapers/Working/venues/EastsideLibrary.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Eastside Freedom Library",
|
||||
city="Minneapolis",
|
||||
website="https://eastsidefreedomlibrary.org/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
contents = ps.xpath('.//*/article')
|
||||
# ppr("contents:", contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/text()')[0].strip()
|
||||
event['link'] = c.xpath('.//*/h3[@class="tribe-events-calendar-list__event-title tribe-common-h6 tribe-common-h4--min-medium"]/a/@href')[0]
|
||||
event['date'] = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].strip() + " " + current_year
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
try:
|
||||
new_event = digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://eastsidefreedomlibrary.org/events/'
|
||||
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
|
||||
get_events(ps)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
178
event_scrapers/Working/venues/FirstAveScrape.py
Normal file
178
event_scrapers/Working/venues/FirstAveScrape.py
Normal file
@@ -0,0 +1,178 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from lxml import html
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="First Avenue",
|
||||
city="Minneapolis",
|
||||
website="https://first-avenue.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %Y %I%p'
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M%p'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
month = int(datetime.now().month)
|
||||
day = int(datetime.now().day)
|
||||
|
||||
if month == 12:
|
||||
next_month = "01"
|
||||
else:
|
||||
next_month = month + 1
|
||||
if next_month < 10:
|
||||
next_month = "0" + str(next_month)
|
||||
|
||||
if month < 10:
|
||||
month = "0" + str(month)
|
||||
|
||||
year = int(datetime.now().year)
|
||||
|
||||
calendar_url = 'https://first-avenue.com/shows/?start_date=' + str(year) + str(month) + str(day)
|
||||
|
||||
next_month_string = str(next_month) + "01"
|
||||
|
||||
if next_month == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
if int(next_month) == 1:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year + 1) + next_month_string
|
||||
else:
|
||||
calendar_url_2 = 'https://first-avenue.com/shows/?start_date=' + str(year) + next_month_string
|
||||
|
||||
|
||||
print("\n\n", calendar_url, calendar_url_2, "\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
if datetime.now().day < 8:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
elif 7 < datetime.now().day < 15:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
elif 14 < datetime.now().day < 21:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:95]
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:31]
|
||||
else:
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
shows = ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')
|
||||
|
||||
ps = digitools.getSource(br, calendar_url_2)
|
||||
shows = shows + ps.xpath('.//*/div[@class="show_name content flex-fill"]/div/div/h4/a/@href')[:63]
|
||||
|
||||
events = []
|
||||
|
||||
def get_info(pse):
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event["venue"] = pse.xpath('.//*/div[@class="content"]/div/div[@class="venue_name"]/text()')[0].replace('\t', '').replace('\n', '').strip()
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
if event["show_title"] == "":
|
||||
event["show_title"] = pse.xpath('.//*/span[@class="show_title"]/text()')[2].replace('\t', '').replace('\n', '')
|
||||
event["guests"] = pse.xpath('.//*/div[@class="feature_details_main d-flex align-items-center"]/div/h4/text()')
|
||||
event["flyer"] = pse.xpath('.//*/img[@class="gig_poster lazy loaded"]/@src')
|
||||
try:
|
||||
event = get_date(pse, event)
|
||||
except Exception as e:
|
||||
print("date issue: ", e)
|
||||
try:
|
||||
event = get_details(pse, event)
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT)
|
||||
except Exception as e:
|
||||
print("Using alt date format 2: ", e)
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]) + " " + event["details"]["Doors Open"], DATETIME_FORMAT_2)
|
||||
ppr(event)
|
||||
except Exception as e:
|
||||
print("Using alt date format 3: ", e)
|
||||
print(event['date'])
|
||||
event["date_time"] = datetime.strptime(" ".join(event["date"]), DATETIME_FORMAT_3)
|
||||
return event
|
||||
|
||||
def get_date(pse, event):
|
||||
month = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="month"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
day = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="day"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
year = pse.xpath('.//*/div[@class="date_container"]/div/div[@class="year"]/text()')[0].replace('\t', '').replace('\n', '')
|
||||
event["date"] = [month, day, year]
|
||||
return event
|
||||
|
||||
def get_details(pse, event):
|
||||
try:
|
||||
details = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h6/text()')
|
||||
info = pse.xpath('.//*/div[@class="show_details text-center"]/div/div/h2/text()')
|
||||
di = zip(details, info)
|
||||
details = {}
|
||||
for d,i in di:
|
||||
details[d] = i
|
||||
event["details"] = details
|
||||
return event
|
||||
except Exception as e:
|
||||
print("details issue: ", e)
|
||||
|
||||
for show in shows:
|
||||
br.get(show)
|
||||
sleep(2)
|
||||
try:
|
||||
pse = html.fromstring(br.page_source)
|
||||
except Exception as e:
|
||||
print(show)
|
||||
pass
|
||||
try:
|
||||
event = get_info(pse)
|
||||
except Exception as e:
|
||||
print("get_info error: ", e)
|
||||
try:
|
||||
event["link"] = show
|
||||
if event["venue"] in ["Palace Theater", "Turf Club", "The Fitzgerald Theater", "Amsterdam Bar & Hall"]:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="St. Paul")
|
||||
else:
|
||||
venue, created = Organization.objects.get_or_create(name=event["venue"], is_venue=True, city="Minneapolis")
|
||||
except Exception as e:
|
||||
print("Venue creation error: ", e, "\n", event, "\n", event["venue"])
|
||||
try:
|
||||
event['dateStamp'] = event['date_time']
|
||||
event['scraper'] = scraper
|
||||
new_event, created = digitools.createDetailedEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("event creation error: ", e, "\n\n", event, "\n\n", created)
|
||||
quit()
|
||||
|
||||
ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
# br.find_element_by_class_name('fc-btn_allCalendars-button').click()
|
||||
68
event_scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
68
event_scrapers/Working/venues/GinkgoCoffee.stp.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Ginkgo Coffee",
|
||||
city="Saint Paul",
|
||||
website="https://ginkgocoffee.com/events/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT_2 = '%b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT = '%B %d @ %I:%M %p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/article')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
dateTime = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
month = c.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split(' ')[0]
|
||||
year = datetime.now().year
|
||||
if month == "January":
|
||||
year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
event['date'] = " ".join([ dateTime, str(year)])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = c.xpath('.//*/h3/a/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
event_scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
71
event_scrapers/Working/venues/GreenRoom.Mpls.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Green Room",
|
||||
city="Minneapolis",
|
||||
website="https://www.greenroommn.com/events",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a %b %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="vp-event-card vp-venue-greenroom vp-col"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
|
||||
date = c.xpath('.//*/span[@class="vp-date"]/text()')[0].strip()
|
||||
month = date.split(" ")[1]
|
||||
year = datetime.now().year
|
||||
# if month == "Jan":
|
||||
# year = int(year) + 1
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['datetime'] = date + " " + str(year) + " " + time
|
||||
event['dateStamp'] = datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
event['link'] = venue.website + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n+++\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, event_type)
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
96
event_scrapers/Working/venues/HookLadderScrape.py
Normal file
96
event_scrapers/Working/venues/HookLadderScrape.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from lxml import html
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Hook & Ladder",
|
||||
city="Minneapolis",
|
||||
website="https://thehookmpls.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
ppr(scraper)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a, %b %d, %Y @ %I:%M %p'
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
"https://thehookmpls.com/events/list/page/1",
|
||||
"https://thehookmpls.com/events/list/page/2",
|
||||
"https://thehookmpls.com/events/list/page/3"
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
def get_listings(pse, events):
|
||||
nevents = pse.xpath('.//*/article')
|
||||
for event in nevents:
|
||||
e = {}
|
||||
e['datetime'] = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0]
|
||||
e['show_title'] = event.xpath('.//*/header/h2/a/@title')[0]
|
||||
e['link'] = event.xpath('.//*/header/h2/a/@href')[0]
|
||||
try:
|
||||
e['subtitle'] = event.xpath('.//*/header/div[@class="eventSubHead"]/text()')[0]
|
||||
except:
|
||||
continue
|
||||
try:
|
||||
e['price'] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0].replace("Tickets ", "")
|
||||
except:
|
||||
e['price'] = "See Link"
|
||||
e['image'] = event.xpath('.//*/img/@data-src')[0]
|
||||
e["date_time"] = datetime.strptime(e['datetime'], DATETIME_FORMAT)
|
||||
e['scraper'] = scraper
|
||||
e['calendar'] = scraper.calendar
|
||||
events.append(e)
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
br.get(cal)
|
||||
sleep(3)
|
||||
pse = html.fromstring(br.page_source)
|
||||
get_listings(pse, events)
|
||||
|
||||
for event in events:
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = event['calendar'],
|
||||
scraper = event['scraper'],
|
||||
event_type = 'Mu',
|
||||
show_title = event["show_title"],
|
||||
show_link = event["link"],
|
||||
show_date = event["date_time"],
|
||||
show_day = event["date_time"],
|
||||
guests = " ".join(event["subtitle"]),
|
||||
venue = venue
|
||||
)
|
||||
except Exception as e:
|
||||
print("oops ", e, "\n\n", "Scraper:", scraper)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
70
event_scrapers/Working/venues/MagersQuinn.py
Normal file
70
event_scrapers/Working/venues/MagersQuinn.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Magers & Quinn",
|
||||
city="Minneapolis",
|
||||
website="https://www.magersandquinn.com/events",
|
||||
is_venue=False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d , %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="day has-event"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
day = c.xpath('.//*/div[@class="dd"]/text()')[0]
|
||||
month = c.xpath('.//*/div[@class="month"]/text()')[0]
|
||||
year = c.xpath('.//*/div[@class="year"]/text()')[0]
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3/text()')[0]
|
||||
event['date'] = [month, day, year, c.xpath('.//*/p[@class="time"]/text()')[0]]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] =datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event in-store"]/@href')[0]
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
event['link'] = "https://www.magersandquinn.com" + c.xpath('.//a[@class="event off-site"]/@href')[0]
|
||||
print(e)
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, "Ed", venue)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
|
||||
links = digitools.createBasicURL("https://www.magersandquinn.com/events/")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Ed")
|
||||
sleep(3)
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
80
event_scrapers/Working/venues/MplsVFW.py
Normal file
80
event_scrapers/Working/venues/MplsVFW.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Uptown VFW",
|
||||
city="Minneapolis",
|
||||
website="https://noboolpresents.com/venues/uptown-vfw/",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%a %B %d @ %I:%M %p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://noboolpresents.com/venues/uptown-vfw/'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(30)
|
||||
|
||||
def getEvents(br):
|
||||
ps = html.fromstring(br.page_source)
|
||||
events = ps.xpath('.//*/article')
|
||||
for event in events:
|
||||
deets = {}
|
||||
dateTime = event.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].replace("•", "").strip() + " " + current_year
|
||||
title = event.xpath('.//*/h2[@class="alt-font"]/a/text()')[0].replace("\n", "").replace("\t", "")
|
||||
link = event.xpath('.//*/h2[@class="alt-font"]/a/@href')[0]
|
||||
deets["tickets"] = event.xpath('.//*/span[@class="tribe-events-c-small-cta__price"]/strong/text()')[0]
|
||||
try:
|
||||
new_event = Event.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = 'Mu',
|
||||
show_title = title,
|
||||
show_link = link,
|
||||
show_date = datetime.strptime(dateTime, DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(dateTime, DATETIME_FORMAT).date(),
|
||||
more_details = deets["tickets"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("oops", e)
|
||||
|
||||
getEvents(br)
|
||||
br.find_element(By.XPATH, './/*/li[@class="tribe-events-c-nav__list-item tribe-events-c-nav__list-item--next"]/a').click()
|
||||
sleep(5)
|
||||
getEvents(br)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
106
event_scrapers/Working/venues/ParkwayTheater.py
Normal file
106
event_scrapers/Working/venues/ParkwayTheater.py
Normal file
@@ -0,0 +1,106 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper, Event as DSEvent
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
try:
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Parkway Theater",
|
||||
city="Minneapolis",
|
||||
website="https://theparkwaytheater.com",
|
||||
is_venue = True
|
||||
)
|
||||
except Exception as e:
|
||||
venue = Organization.objects.get(name="Parkway Theater")
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d, %Y %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="summary-content sqs-gallery-meta-container"]')
|
||||
img_etc = ps.xpath('.//*/div[@class="summary-thumbnail-outer-container"]/a/div/img/@src')
|
||||
ps.xpath('.//*/span[@class="event-time-12hr"]/text()')
|
||||
for c,i in zip(contents,img_etc):
|
||||
try:
|
||||
event = {}
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/a[@class="summary-title-link"]/text()')[0]
|
||||
event['link'] = "https://theparkwaytheater.com" + c.xpath('.//*/a[@class="summary-title-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['desc'] = c.xpath('.//*/p/text()')[0]
|
||||
event['img_link'] = i
|
||||
event['details'] = {
|
||||
'description': event['desc'],
|
||||
'img_link': event['img_link'],
|
||||
}
|
||||
|
||||
try:
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
try:
|
||||
event['date'] = c.xpath('.//div/div/time/text()')[0].split("–")[0] + " " + c.xpath('.//*/span[@class="event-time-12hr"]/text()')[0].split("–")[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
new_event = DSEvent.objects.update_or_create(
|
||||
calendar = scraper.calendar,
|
||||
scraper = scraper,
|
||||
event_type = event_type,
|
||||
show_title = event['title'],
|
||||
show_link = event['link'],
|
||||
show_date = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
show_day = datetime.strptime(event['date'], DATETIME_FORMAT),
|
||||
more_details = event["details"],
|
||||
venue = venue
|
||||
)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("\n\n+++\n\n")
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
calendar_url = 'https://theparkwaytheater.com/live-events'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Mu")
|
||||
|
||||
calendar_url = "https://theparkwaytheater.com/movies"
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps, "Th")
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
98
event_scrapers/Working/venues/SPCO.stp.py
Normal file
98
event_scrapers/Working/venues/SPCO.stp.py
Normal file
@@ -0,0 +1,98 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="St Paul Chamber Orchestra",
|
||||
city="St Paul",
|
||||
website="https://thespco.org",
|
||||
is_venue = False
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y – %I:%M %p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="event-title"]/a/@href')
|
||||
for c in set(contents):
|
||||
try:
|
||||
link = 'https://content.thespco.org' + c
|
||||
ps = digitools.getSource(br, link)
|
||||
ntitle = ps.xpath('.//*/article/h1/text()')
|
||||
subtitle = ps.xpath('.//*/article/h1/em/text()')
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
if len(subtitle) == 1:
|
||||
if len(ntitle) == 2:
|
||||
title = ntitle[0] + subtitle[0] + ntitle[1]
|
||||
elif ntitle[0].startswith(" "):
|
||||
title = subtitle[0] + ntitle[0]
|
||||
else:
|
||||
title = ntitle[0] + subtitle[0]
|
||||
else:
|
||||
title = ntitle[0]
|
||||
|
||||
events = ps.xpath('.//*/div[@class="day"]')
|
||||
for e in events:
|
||||
new_venue = e.xpath('.//*/strong[@class="venue"]/text()')[0].strip()
|
||||
location = e.xpath('.//*/span[@class="location"]/text()')[0].strip()
|
||||
if 'Minneapolis' in location:
|
||||
location = 'Minneapolis'
|
||||
elif 'St. Paul' in location:
|
||||
location = 'St. Paul'
|
||||
else:
|
||||
location = location
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name=new_venue,
|
||||
city=location,
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
dateTime = e.xpath('.//*/h3[@class="date"]/text()')[0].replace("\n", "").replace("\t", "").strip()
|
||||
event['dateStamp'] = datetime.strptime(dateTime, DATETIME_FORMAT)
|
||||
event['venue'] = venue
|
||||
event['location'] = location
|
||||
event['title'] = "SPCO: " + title
|
||||
event['link'] = link
|
||||
event_type = "Mu"
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print("ERROR: ", e)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
# Get Event Page Link(s)
|
||||
links = digitools.createURLNoZero("https://content.thespco.org/events/calendar/")
|
||||
|
||||
for link in links:
|
||||
ps = digitools.getSource(br, link)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
71
event_scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
71
event_scrapers/Working/venues/WhiteSquirrelScrape.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%Y-%m-%d %I:%M %p'
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="White Squirrel",
|
||||
city="St. Paul",
|
||||
website="https://whitesquirrelbar.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = [
|
||||
'https://whitesquirrelbar.com/calendar/list/page/1/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/2/',
|
||||
'https://whitesquirrelbar.com/calendar/list/page/3/'
|
||||
]
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
def get_listings(pse, events):
|
||||
listings = pse.xpath('.//*/div[@class="tribe-common-g-row tribe-events-calendar-list__event-row"]')
|
||||
for l in listings:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
try:
|
||||
event["image"] = l.xpath('.//*/img/@src')[0]
|
||||
except:
|
||||
event["image"] = "none"
|
||||
event["date"] = l.xpath('.//time/@datetime')[0]
|
||||
event["time"] = l.xpath('.//*/span[@class="tribe-event-date-start"]/text()')[0].split("@")[1]
|
||||
event["title"] = l.xpath('.//*/h3/a/text()')[0].replace("\t", "").replace("\n", "")
|
||||
event["link"] = l.xpath('.//*/h3/a/@href')[0]
|
||||
event['datetime'] = event['date'] + " " + event['time']
|
||||
event["dateStamp"] = datetime.strptime(event['datetime'] , DATETIME_FORMAT)
|
||||
events.append(event)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
events = []
|
||||
|
||||
for cal in calendar_url:
|
||||
ps = digitools.getSource(br, cal)
|
||||
get_listings(ps, events)
|
||||
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
74
event_scrapers/Working/venues/cedar.mpls.py
Normal file
74
event_scrapers/Working/venues/cedar.mpls.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cedar Cultural Center",
|
||||
city="Minneapolis",
|
||||
website="https://www.thecedar.org",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%A, %B %d, %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
DATETIME_FORMAT_3 = '%A, %B %d at %I:%M%p %Y'
|
||||
DATETIME_FORMAT_4 = '%A, %B %d at %I%p %Y'
|
||||
DATETIME_FORMAT_5 = '%A, %B %d @%I%p %Y'
|
||||
|
||||
def get_events(ps):
|
||||
links = ps.xpath('.//*/div[@class="summary-title"]/a/@href')
|
||||
for l in links:
|
||||
if "cedar-news-blog" in l:
|
||||
continue
|
||||
pse = digitools.getSource(br, "https://www.thecedar.org" + l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = "https://www.thecedar.org" + l
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized-start"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except:
|
||||
try:
|
||||
time = pse.xpath('.//*/time[@class="event-time-localized"]/text()')[0]
|
||||
date = pse.xpath('.//*/time[@class="event-date"]/text()')[0]
|
||||
event['title'] = pse.xpath('.//*/h1[@class="eventitem-title"]/text()')[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
dateStamp = date + " " + time
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
calendar_url = 'https://www.thecedar.org/listing'
|
||||
ps = digitools.getSource(br, calendar_url)
|
||||
get_events(ps)
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
105
event_scrapers/Working/venues/club331Scrape.py
Normal file
105
event_scrapers/Working/venues/club331Scrape.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from selenium.webdriver.common.by import By
|
||||
from lxml import html
|
||||
|
||||
from events.models import Organization, Scraper, Event
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Club 331",
|
||||
city="Minneapolis",
|
||||
website="https://331club.com",
|
||||
is_venue=True,
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
tz = pytz.timezone("US/Central")
|
||||
|
||||
DATETIME_FORMAT = '%b %d %I%p %Y'
|
||||
DATETIME_FORMAT_2 = '%b %d %I:%M%p %Y'
|
||||
DATETIME_FORMAT_3 = '%b %d %Y'
|
||||
# Set initial variables for City, etc
|
||||
calendar_url = 'https://331club.com/#calendar'
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
br.get(calendar_url)
|
||||
sleep(3)
|
||||
|
||||
br.find_element(By.CLASS_NAME, 'more_events').click()
|
||||
sleep(2)
|
||||
ps = html.fromstring(br.page_source)
|
||||
sleep(3)
|
||||
|
||||
dates = ps.xpath('.//*/div[@class="event"]')
|
||||
dates = dates + ps.xpath('.//*/div[@class="event hidden"]')
|
||||
|
||||
def process_times(times):
|
||||
# print("Times: ", times)
|
||||
time = []
|
||||
for t in times:
|
||||
t = t.replace("\n", "").replace("TBA", "")
|
||||
if len(t) > 0 and t.endswith("pm"):
|
||||
if "-" in t:
|
||||
t = t.split("-")[0] + "pm"
|
||||
time.append(t)
|
||||
return time
|
||||
|
||||
events = []
|
||||
|
||||
for d in dates:
|
||||
event_date = d.xpath('.//div[@class="event-date"]/span/text()')[:2]
|
||||
cols = d.xpath('.//div[@class="column"]')
|
||||
for c in cols:
|
||||
bands = c.xpath('.//p/a/text()')
|
||||
links = c.xpath('.//p/a/@href')
|
||||
time = process_times(c.xpath('.//p/text()'))
|
||||
event = {}
|
||||
event["datetime"] = event_date + time + [current_year]
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_2)
|
||||
except:
|
||||
try:
|
||||
event["date_time"] = datetime.strptime(" ".join(event["datetime"]), DATETIME_FORMAT_3)
|
||||
except:
|
||||
event["date_time"] = "Invalid"
|
||||
event["bands"] = (", ").join(bands)
|
||||
if len(bands) > 0:
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = event["bands"]
|
||||
event['date'] = event["date_time"]
|
||||
event['dateStamp'] = event["date_time"]
|
||||
event['link'] = "https://331club.com/#calendar"
|
||||
|
||||
try:
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
except Exception as e:
|
||||
print('oops', e)
|
||||
events.append(event)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
64
event_scrapers/Working/workshop/ComedyUnderground.py
Normal file
64
event_scrapers/Working/workshop/ComedyUnderground.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
|
||||
from events.models import Event as DSEvent, Organization
|
||||
from digitools import getBrowser, createDashURL, createBasicEvent, getSource
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Comedy Corner",
|
||||
city="Minneapolis",
|
||||
website="https://comedycornerunderground.com/calendar",
|
||||
)
|
||||
|
||||
calendar_url = "https://comedycornerunderground.com/calendar"
|
||||
|
||||
DATETIME_FORMAT = '%m %d %I%p %Y'
|
||||
|
||||
def get_events(ps, link):
|
||||
contents = ps.xpath('.//*/td')
|
||||
for c in contents:
|
||||
try:
|
||||
day_num = c.xpath('.//*/div[@class="marker-daynum"]/text()')[0]
|
||||
events = c.xpath('.//*/li')
|
||||
# print(events)
|
||||
for e in events:
|
||||
event = {}
|
||||
print(link)
|
||||
month = link.split("month=")[1].split("-")[0]
|
||||
year = link.split("month=")[1].split("-")[1]
|
||||
event['title'] = e.xpath('.//*/span[@class="item-title"]/text()')[0]
|
||||
event['time'] = e.xpath('.//*/span[@class="item-time item-time--12hr"]/text()')[0].replace("\xa0", "")
|
||||
event['link'] = "https://comedycornerunderground.com" + e.xpath('.//a/@href')[0]
|
||||
event['date'] = str(day_num) + ' ' + event['time'] + 'm'
|
||||
dateStamp = month + ' ' + event['date'] + ' ' + year
|
||||
event['dateStamp'] = datetime.strptime(dateStamp, DATETIME_FORMAT)
|
||||
createBasicEvent(event, 'Co')
|
||||
print('\n\n++\n\n')
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
links = createDashURL("https://comedycornerunderground.com/calendar?view=calendar&month=")
|
||||
|
||||
for link in links:
|
||||
ps = getSource(br, link)
|
||||
get_events(ps, link)
|
||||
sleep(5)
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
74
event_scrapers/Working/workshop/cabooze.mn.py
Normal file
74
event_scrapers/Working/workshop/cabooze.mn.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Cabooze",
|
||||
city="Minneapolis",
|
||||
website="https://www.cabooze.com/#/events",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b %d %I:%M %p %Y'
|
||||
DATETIME_FORMAT_2 = '%A, %B %d @ %I%p %Y'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
print("Getting events ...")
|
||||
contents = ps.xpath('.//*/div[@class="vp-event-row vp-widget-reset vp-venue-thecabooze"]')
|
||||
ppr(contents)
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
date = c.xpath('.//*/span[@class="vp-month-n-day"]/text()')[0]
|
||||
print(date)
|
||||
month = date.split(" ")[0]
|
||||
time = c.xpath('.//*/span[@class="vp-time"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if month == "Jan":
|
||||
year = int(year) + 1
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/div[@class="vp-event-name"]/text()')[0]
|
||||
event['date'] = [date, time, str(year)]
|
||||
event['date'] = " ".join(event['date'])
|
||||
event['dateStamp'] = datetime.strptime(event['date'], DATETIME_FORMAT)
|
||||
event['link'] = "https://www.cabooze.com/" + c.xpath('.//a[@class="vp-event-link"]/@href')[0]
|
||||
print("Event Dict Created")
|
||||
ppr(event)
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
ppr(event)
|
||||
print("\n\n+++\n\n")
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
sleep(3)
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
br.close()
|
||||
82
event_scrapers/Working/workshop/dakota.mpls.py
Normal file
82
event_scrapers/Working/workshop/dakota.mpls.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
count = 0
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Dakota",
|
||||
city="Minneapolis",
|
||||
website="https://www.dakotacooks.com/events/",
|
||||
is_venue=True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = ""
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a %b %d, %Y • %I:%M%p'
|
||||
DATETIME_FORMAT_2 = '%a %b %d, %Y • %I%p'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
links = ps.xpath('.//*/div[@class="wicked-event-title tooltipstered"]/a/@href')
|
||||
links = set(links)
|
||||
for l in links:
|
||||
pse = getSource(br, l)
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = l
|
||||
try:
|
||||
event['time'] = pse.xpath('.//*/span[@class="text-uppercase"]/text()')[0].strip()
|
||||
event['dateStamp'] = datetime.strptime(event['time'], DATETIME_FORMAT)
|
||||
event['title'] = pse.xpath('.//*/div[@class="sidebar-group"]/h1/text()')[0]
|
||||
# event['detail-headers'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/strong/text()')
|
||||
# event['details'] = pse.xpath('.//*/ul[@class="eo-event-meta"]/li/text()')
|
||||
|
||||
except:
|
||||
try:
|
||||
event['title'] = pse.xpath('.//*/div[@class="sidebar-group"]/h1/text()')[0]
|
||||
event['dateStamp'] = datetime.strptime(event['time'], DATETIME_FORMAT_2)
|
||||
event['time'] = pse.xpath('.//*/span[@class="text-uppercase"]/text()')[0]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("failed event: ", event)
|
||||
ppr(event)
|
||||
try:
|
||||
digitools.createBasicEvent(event, "Mu", venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('failed to create: ', event)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
|
||||
ps = digitools.getSource(br, venue.website + '?wicked_month=04&wicked_year=2025&wicked_view=month')
|
||||
get_events(ps, "Mu")
|
||||
sleep(1)
|
||||
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
|
||||
br.close()
|
||||
78
event_scrapers/Working/workshop/icehouse.mpls.py
Normal file
78
event_scrapers/Working/workshop/icehouse.mpls.py
Normal file
@@ -0,0 +1,78 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
count = 0
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%a, %b %d %Y %I:%M %p SHOW'
|
||||
DATETIME_FORMAT_2 = '%a, %b %d %Y %I:%M %p SHOW'
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Icehouse",
|
||||
city="Minneapolis",
|
||||
website = "https://icehouse.turntabletickets.com",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="performances whitespace-pre-line w-full md:w-3/4"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['title'] = c.xpath('.//*/h3[@class="text-3xl font-semibold font-heading mr-auto"]/text()')[0]
|
||||
event['link'] = venue.website + c.xpath('.//*/a[@class="show-link"]/@href')[0]
|
||||
event['date'] = c.xpath('.//*/h4[@class="day-of-week"]/text()')[0]
|
||||
year = datetime.now().year
|
||||
if "Brunch" in event['title']:
|
||||
event['time'] = "11:00 AM SHOW"
|
||||
else:
|
||||
event['time'] = c.xpath('.//*/div[@class="performance-btn"]/button/text()')[0]
|
||||
|
||||
event['datetime'] = event['date'] + " " + str(year) + " " + event['time']
|
||||
try:
|
||||
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
except:
|
||||
event['datetime'] = event['date'] + " " + str(year) + " " + "07:00 PM SHOW"
|
||||
event['dateStamp'] =datetime.strptime(event['datetime'], DATETIME_FORMAT)
|
||||
event['title'] = event['title'] + " (Time Estimated)"
|
||||
try:
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
scraper.items+=1
|
||||
except Exception as e:
|
||||
print(e)
|
||||
quit()
|
||||
|
||||
except Exception as e:
|
||||
ppr(event)
|
||||
print(e)
|
||||
quit()
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, "Mu")
|
||||
|
||||
# ppr(events)
|
||||
br.close()
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
86
event_scrapers/Working/workshop/pillarforum.mpls.py
Normal file
86
event_scrapers/Working/workshop/pillarforum.mpls.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import os, sys
|
||||
from datetime import datetime
|
||||
from dateutil import relativedelta
|
||||
|
||||
import django
|
||||
sys.path.append('../../../')
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'ds_events.settings'
|
||||
django.setup()
|
||||
|
||||
from time import sleep
|
||||
from pprint import pprint as ppr
|
||||
import pytz
|
||||
|
||||
from events.models import Organization, Scraper
|
||||
import events.digitools as digitools
|
||||
|
||||
|
||||
current_year = str(datetime.now().year)
|
||||
|
||||
venue, created = Organization.objects.get_or_create(
|
||||
name="Piller Forum",
|
||||
city="Minneapolis",
|
||||
website="https://www.pilllar.com/pages/events",
|
||||
is_venue = True
|
||||
)
|
||||
|
||||
scraper,item_count_start = digitools.getScraper(venue)
|
||||
|
||||
event_type = "Mu"
|
||||
|
||||
# Time Signatures
|
||||
tz = pytz.timezone("US/Central")
|
||||
DATETIME_FORMAT = '%b. %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_night = '%b. %d %Y %I:%M %p'
|
||||
DATETIME_FORMAT_2 = '%b. %d %Y %I:%Mam'
|
||||
|
||||
def get_events(ps, event_type):
|
||||
contents = ps.xpath('.//*/div[@class="sse-column sse-half sse-center"]')
|
||||
for c in contents:
|
||||
try:
|
||||
event = {}
|
||||
event['scraper'] = scraper
|
||||
event['calendar'] = scraper.calendar
|
||||
event['link'] = venue.website
|
||||
# time = c.xpath('.//*/span[@class="vp-time"]/text()')[0].strip()
|
||||
date = c.xpath('.//h1[@class="sse-size-64"]/text()')[0]
|
||||
if len(date) > 1:
|
||||
print(date)
|
||||
year = datetime.now().year
|
||||
event_date = date + " " + str(year)
|
||||
event['title'] = c.xpath('.//p/span/b/text()')[0]
|
||||
details = c.xpath('.//p/text()')
|
||||
if 'Music' in details[-1]:
|
||||
event_time = c.xpath('.//p/text()')[-1].split("Music")[1].strip()
|
||||
event_type = "Mu"
|
||||
event_dt = event_date + " " + event_time + " PM"
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_night)
|
||||
elif len(details) == 1:
|
||||
try:
|
||||
event_time = details[0].split("-")[0].strip()
|
||||
event_dt = event_date + " " + event_time + ' PM'
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_night)
|
||||
event_type = "Ot"
|
||||
except Exception as e:
|
||||
event_time = details[0].split("-")[0].strip()
|
||||
event_dt = event_date + " " + event_time
|
||||
event['dateStamp'] = datetime.strptime(event_dt, DATETIME_FORMAT_2)
|
||||
event_type = "Ot"
|
||||
digitools.createBasicEvent(event, event_type, venue)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
if len(sys.argv) >= 2:
|
||||
arg1 = sys.argv[1]
|
||||
br = digitools.getBrowser(arg1)
|
||||
else:
|
||||
print("No run_env")
|
||||
quit()
|
||||
|
||||
ps = digitools.getSource(br, venue.website)
|
||||
get_events(ps, event_type)
|
||||
sleep(3)
|
||||
|
||||
br.close()
|
||||
|
||||
digitools.updateScraper(scraper, item_count_start)
|
||||
Reference in New Issue
Block a user